encoding.c
1 /* 2 * encoding.c : implements the encoding conversion functions needed for XML 3 * 4 * Related specs: 5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies 6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau 7 * [ISO-10646] UTF-8 and UTF-16 in Annexes 8 * [ISO-8859-1] ISO Latin-1 characters codes. 9 * [UNICODE] The Unicode Consortium, "The Unicode Standard -- 10 * Worldwide Character Encoding -- Version 1.0", Addison- 11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is 12 * described in Unicode Technical Report #4. 13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for 14 * Information Interchange, ANSI X3.4-1986. 15 * 16 * See Copyright for the status of this software. 17 * 18 * daniel@veillard.com 19 * 20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org> 21 */ 22 23 #define IN_LIBXML 24 #include "libxml.h" 25 26 #include <string.h> 27 #include <limits.h> 28 29 #ifdef HAVE_CTYPE_H 30 #include <ctype.h> 31 #endif 32 #ifdef HAVE_STDLIB_H 33 #include <stdlib.h> 34 #endif 35 #ifdef LIBXML_ICONV_ENABLED 36 #ifdef HAVE_ERRNO_H 37 #include <errno.h> 38 #endif 39 #endif 40 #include <libxml/encoding.h> 41 #include <libxml/xmlmemory.h> 42 #ifdef LIBXML_HTML_ENABLED 43 #include <libxml/HTMLparser.h> 44 #endif 45 #include <libxml/globals.h> 46 #include <libxml/xmlerror.h> 47 48 #ifdef LIBXML_ICU_ENABLED 49 #include <unicode/ucnv.h> 50 #endif 51 52 #include "buf.h" 53 #include "enc.h" 54 55 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; 56 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; 57 58 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias; 59 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr; 60 struct _xmlCharEncodingAlias { 61 const char *name; 62 const char *alias; 63 }; 64 65 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL; 66 static int xmlCharEncodingAliasesNb = 0; 67 static int xmlCharEncodingAliasesMax = 0; 68 69 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED) 70 #if 0 71 #define DEBUG_ENCODING /* Define this to get encoding traces */ 72 #endif 73 #else 74 #ifdef LIBXML_ISO8859X_ENABLED 75 static void xmlRegisterCharEncodingHandlersISO8859x (void); 76 #endif 77 #endif 78 79 static int xmlLittleEndian = 1; 80 81 /** 82 * xmlEncodingErrMemory: 83 * @extra: extra informations 84 * 85 * Handle an out of memory condition 86 */ 87 static void 88 xmlEncodingErrMemory(const char *extra) 89 { 90 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra); 91 } 92 93 /** 94 * xmlErrEncoding: 95 * @error: the error number 96 * @msg: the error message 97 * 98 * n encoding error 99 */ 100 static void LIBXML_ATTR_FORMAT(2,0) 101 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val) 102 { 103 #pragma clang diagnostic push 104 #pragma clang diagnostic ignored "-Wformat-nonliteral" 105 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, 106 XML_FROM_I18N, error, XML_ERR_FATAL, 107 NULL, 0, val, NULL, NULL, 0, 0, msg, val); 108 #pragma clang diagnostic pop 109 } 110 111 #ifdef LIBXML_ICU_ENABLED 112 static uconv_t* 113 openIcuConverter(const char* name, int toUnicode) 114 { 115 UErrorCode status = U_ZERO_ERROR; 116 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t)); 117 if (conv == NULL) 118 return NULL; 119 120 conv->uconv = ucnv_open(name, &status); 121 if (U_FAILURE(status)) 122 goto error; 123 124 status = U_ZERO_ERROR; 125 if (toUnicode) { 126 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP, 127 NULL, NULL, NULL, &status); 128 } 129 else { 130 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP, 131 NULL, NULL, NULL, &status); 132 } 133 if (U_FAILURE(status)) 134 goto error; 135 136 status = U_ZERO_ERROR; 137 conv->utf8 = ucnv_open("UTF-8", &status); 138 if (U_SUCCESS(status)) 139 return conv; 140 141 error: 142 if (conv->uconv) 143 ucnv_close(conv->uconv); 144 xmlFree(conv); 145 return NULL; 146 } 147 148 static void 149 closeIcuConverter(uconv_t *conv) 150 { 151 if (conv != NULL) { 152 ucnv_close(conv->uconv); 153 ucnv_close(conv->utf8); 154 xmlFree(conv); 155 } 156 } 157 #endif /* LIBXML_ICU_ENABLED */ 158 159 /************************************************************************ 160 * * 161 * Conversions To/From UTF8 encoding * 162 * * 163 ************************************************************************/ 164 165 /** 166 * asciiToUTF8: 167 * @out: a pointer to an array of bytes to store the result 168 * @outlen: the length of @out 169 * @in: a pointer to an array of ASCII chars 170 * @inlen: the length of @in 171 * 172 * Take a block of ASCII chars in and try to convert it to an UTF-8 173 * block of chars out. 174 * Returns 0 if success, or -1 otherwise 175 * The value of @inlen after return is the number of octets consumed 176 * if the return value is positive, else unpredictable. 177 * The value of @outlen after return is the number of octets consumed. 178 */ 179 static int 180 asciiToUTF8(unsigned char* out, int *outlen, 181 const unsigned char* in, int *inlen) { 182 unsigned char* outstart = out; 183 const unsigned char* base = in; 184 const unsigned char* processed = in; 185 unsigned char* outend = out + *outlen; 186 const unsigned char* inend; 187 unsigned int c; 188 189 inend = in + (*inlen); 190 while ((in < inend) && (out - outstart + 5 < *outlen)) { 191 c= *in++; 192 193 if (out >= outend) 194 break; 195 if (c < 0x80) { 196 *out++ = c; 197 } else { 198 *outlen = out - outstart; 199 *inlen = processed - base; 200 return(-1); 201 } 202 203 processed = (const unsigned char*) in; 204 } 205 *outlen = out - outstart; 206 *inlen = processed - base; 207 return(*outlen); 208 } 209 210 #ifdef LIBXML_OUTPUT_ENABLED 211 /** 212 * UTF8Toascii: 213 * @out: a pointer to an array of bytes to store the result 214 * @outlen: the length of @out 215 * @in: a pointer to an array of UTF-8 chars 216 * @inlen: the length of @in 217 * 218 * Take a block of UTF-8 chars in and try to convert it to an ASCII 219 * block of chars out. 220 * 221 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 222 * The value of @inlen after return is the number of octets consumed 223 * if the return value is positive, else unpredictable. 224 * The value of @outlen after return is the number of octets consumed. 225 */ 226 static int 227 UTF8Toascii(unsigned char* out, int *outlen, 228 const unsigned char* in, int *inlen) { 229 const unsigned char* processed = in; 230 const unsigned char* outend; 231 const unsigned char* outstart = out; 232 const unsigned char* instart = in; 233 const unsigned char* inend; 234 unsigned int c, d; 235 int trailing; 236 237 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 238 if (in == NULL) { 239 /* 240 * initialization nothing to do 241 */ 242 *outlen = 0; 243 *inlen = 0; 244 return(0); 245 } 246 inend = in + (*inlen); 247 outend = out + (*outlen); 248 while (in < inend) { 249 d = *in++; 250 if (d < 0x80) { c= d; trailing= 0; } 251 else if (d < 0xC0) { 252 /* trailing byte in leading position */ 253 *outlen = out - outstart; 254 *inlen = processed - instart; 255 return(-2); 256 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 257 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 258 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 259 else { 260 /* no chance for this in Ascii */ 261 *outlen = out - outstart; 262 *inlen = processed - instart; 263 return(-2); 264 } 265 266 if (inend - in < trailing) { 267 break; 268 } 269 270 for ( ; trailing; trailing--) { 271 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 272 break; 273 c <<= 6; 274 c |= d & 0x3F; 275 } 276 277 /* assertion: c is a single UTF-4 value */ 278 if (c < 0x80) { 279 if (out >= outend) 280 break; 281 *out++ = c; 282 } else { 283 /* no chance for this in Ascii */ 284 *outlen = out - outstart; 285 *inlen = processed - instart; 286 return(-2); 287 } 288 processed = in; 289 } 290 *outlen = out - outstart; 291 *inlen = processed - instart; 292 return(*outlen); 293 } 294 #endif /* LIBXML_OUTPUT_ENABLED */ 295 296 /** 297 * isolat1ToUTF8: 298 * @out: a pointer to an array of bytes to store the result 299 * @outlen: the length of @out 300 * @in: a pointer to an array of ISO Latin 1 chars 301 * @inlen: the length of @in 302 * 303 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 304 * block of chars out. 305 * Returns the number of bytes written if success, or -1 otherwise 306 * The value of @inlen after return is the number of octets consumed 307 * if the return value is positive, else unpredictable. 308 * The value of @outlen after return is the number of octets consumed. 309 */ 310 int 311 isolat1ToUTF8(unsigned char* out, int *outlen, 312 const unsigned char* in, int *inlen) { 313 unsigned char* outstart = out; 314 const unsigned char* base = in; 315 unsigned char* outend; 316 const unsigned char* inend; 317 const unsigned char* instop; 318 319 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL)) 320 return(-1); 321 322 outend = out + *outlen; 323 inend = in + (*inlen); 324 instop = inend; 325 326 while ((in < inend) && (out < outend - 1)) { 327 if (*in >= 0x80) { 328 *out++ = (((*in) >> 6) & 0x1F) | 0xC0; 329 *out++ = ((*in) & 0x3F) | 0x80; 330 ++in; 331 } 332 if ((instop - in) > (outend - out)) instop = in + (outend - out); 333 while ((in < instop) && (*in < 0x80)) { 334 *out++ = *in++; 335 } 336 } 337 if ((in < inend) && (out < outend) && (*in < 0x80)) { 338 *out++ = *in++; 339 } 340 *outlen = out - outstart; 341 *inlen = in - base; 342 return(*outlen); 343 } 344 345 /** 346 * UTF8ToUTF8: 347 * @out: a pointer to an array of bytes to store the result 348 * @outlen: the length of @out 349 * @inb: a pointer to an array of UTF-8 chars 350 * @inlenb: the length of @in in UTF-8 chars 351 * 352 * No op copy operation for UTF8 handling. 353 * 354 * Returns the number of bytes written, or -1 if lack of space. 355 * The value of *inlen after return is the number of octets consumed 356 * if the return value is positive, else unpredictable. 357 */ 358 static int 359 UTF8ToUTF8(unsigned char* out, int *outlen, 360 const unsigned char* inb, int *inlenb) 361 { 362 int len; 363 364 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL)) 365 return(-1); 366 if (*outlen > *inlenb) { 367 len = *inlenb; 368 } else { 369 len = *outlen; 370 } 371 if (len < 0) 372 return(-1); 373 374 memcpy(out, inb, len); 375 376 *outlen = len; 377 *inlenb = len; 378 return(*outlen); 379 } 380 381 382 #ifdef LIBXML_OUTPUT_ENABLED 383 /** 384 * UTF8Toisolat1: 385 * @out: a pointer to an array of bytes to store the result 386 * @outlen: the length of @out 387 * @in: a pointer to an array of UTF-8 chars 388 * @inlen: the length of @in 389 * 390 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 391 * block of chars out. 392 * 393 * Returns the number of bytes written if success, -2 if the transcoding fails, 394 or -1 otherwise 395 * The value of @inlen after return is the number of octets consumed 396 * if the return value is positive, else unpredictable. 397 * The value of @outlen after return is the number of octets consumed. 398 */ 399 int 400 UTF8Toisolat1(unsigned char* out, int *outlen, 401 const unsigned char* in, int *inlen) { 402 const unsigned char* processed = in; 403 const unsigned char* outend; 404 const unsigned char* outstart = out; 405 const unsigned char* instart = in; 406 const unsigned char* inend; 407 unsigned int c, d; 408 int trailing; 409 410 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 411 if (in == NULL) { 412 /* 413 * initialization nothing to do 414 */ 415 *outlen = 0; 416 *inlen = 0; 417 return(0); 418 } 419 inend = in + (*inlen); 420 outend = out + (*outlen); 421 while (in < inend) { 422 d = *in++; 423 if (d < 0x80) { c= d; trailing= 0; } 424 else if (d < 0xC0) { 425 /* trailing byte in leading position */ 426 *outlen = out - outstart; 427 *inlen = processed - instart; 428 return(-2); 429 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 430 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 431 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 432 else { 433 /* no chance for this in IsoLat1 */ 434 *outlen = out - outstart; 435 *inlen = processed - instart; 436 return(-2); 437 } 438 439 if (inend - in < trailing) { 440 break; 441 } 442 443 for ( ; trailing; trailing--) { 444 if (in >= inend) 445 break; 446 if (((d= *in++) & 0xC0) != 0x80) { 447 *outlen = out - outstart; 448 *inlen = processed - instart; 449 return(-2); 450 } 451 c <<= 6; 452 c |= d & 0x3F; 453 } 454 455 /* assertion: c is a single UTF-4 value */ 456 if (c <= 0xFF) { 457 if (out >= outend) 458 break; 459 *out++ = c; 460 } else { 461 /* no chance for this in IsoLat1 */ 462 *outlen = out - outstart; 463 *inlen = processed - instart; 464 return(-2); 465 } 466 processed = in; 467 } 468 *outlen = out - outstart; 469 *inlen = processed - instart; 470 return(*outlen); 471 } 472 #endif /* LIBXML_OUTPUT_ENABLED */ 473 474 /** 475 * UTF16LEToUTF8: 476 * @out: a pointer to an array of bytes to store the result 477 * @outlen: the length of @out 478 * @inb: a pointer to an array of UTF-16LE passwd as a byte array 479 * @inlenb: the length of @in in UTF-16LE chars 480 * 481 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8 482 * block of chars out. This function assumes the endian property 483 * is the same between the native type of this machine and the 484 * inputed one. 485 * 486 * Returns the number of bytes written, or -1 if lack of space, or -2 487 * if the transcoding fails (if *in is not a valid utf16 string) 488 * The value of *inlen after return is the number of octets consumed 489 * if the return value is positive, else unpredictable. 490 */ 491 static int 492 UTF16LEToUTF8(unsigned char* out, int *outlen, 493 const unsigned char* inb, int *inlenb) 494 { 495 unsigned char* outstart = out; 496 const unsigned char* processed = inb; 497 unsigned char* outend; 498 unsigned short* in = (unsigned short*) inb; 499 unsigned short* inend; 500 unsigned int c, d, inlen; 501 unsigned char *tmp; 502 int bits; 503 504 if (*outlen == 0) { 505 *inlenb = 0; 506 return(0); 507 } 508 outend = out + *outlen; 509 if ((*inlenb % 2) == 1) 510 (*inlenb)--; 511 inlen = *inlenb / 2; 512 inend = in + inlen; 513 while ((in < inend) && (out - outstart + 5 < *outlen)) { 514 if (xmlLittleEndian) { 515 c= *in++; 516 } else { 517 tmp = (unsigned char *) in; 518 c = *tmp++; 519 c = c | (((unsigned int)*tmp) << 8); 520 in++; 521 } 522 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 523 if (in >= inend) { /* (in > inend) shouldn't happens */ 524 break; 525 } 526 if (xmlLittleEndian) { 527 d = *in++; 528 } else { 529 tmp = (unsigned char *) in; 530 d = *tmp++; 531 d = d | (((unsigned int)*tmp) << 8); 532 in++; 533 } 534 if ((d & 0xFC00) == 0xDC00) { 535 c &= 0x03FF; 536 c <<= 10; 537 c |= d & 0x03FF; 538 c += 0x10000; 539 } 540 else { 541 *outlen = out - outstart; 542 *inlenb = processed - inb; 543 return(-2); 544 } 545 } 546 547 /* assertion: c is a single UTF-4 value */ 548 if (out >= outend) 549 break; 550 if (c < 0x80) { *out++= c; bits= -6; } 551 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 552 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 553 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 554 555 for ( ; bits >= 0; bits-= 6) { 556 if (out >= outend) 557 break; 558 *out++= ((c >> bits) & 0x3F) | 0x80; 559 } 560 processed = (const unsigned char*) in; 561 } 562 *outlen = out - outstart; 563 *inlenb = processed - inb; 564 return(*outlen); 565 } 566 567 #ifdef LIBXML_OUTPUT_ENABLED 568 /** 569 * UTF8ToUTF16LE: 570 * @outb: a pointer to an array of bytes to store the result 571 * @outlen: the length of @outb 572 * @in: a pointer to an array of UTF-8 chars 573 * @inlen: the length of @in 574 * 575 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE 576 * block of chars out. 577 * 578 * Returns the number of bytes written, or -1 if lack of space, or -2 579 * if the transcoding failed. 580 */ 581 static int 582 UTF8ToUTF16LE(unsigned char* outb, int *outlen, 583 const unsigned char* in, int *inlen) 584 { 585 unsigned short* out = (unsigned short*) outb; 586 const unsigned char* processed = in; 587 const unsigned char *const instart = in; 588 unsigned short* outstart= out; 589 unsigned short* outend; 590 const unsigned char* inend; 591 unsigned int c, d; 592 int trailing; 593 unsigned char *tmp; 594 unsigned short tmp1, tmp2; 595 596 /* UTF16LE encoding has no BOM */ 597 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 598 if (in == NULL) { 599 *outlen = 0; 600 *inlen = 0; 601 return(0); 602 } 603 inend= in + *inlen; 604 outend = out + (*outlen / 2); 605 while (in < inend) { 606 d= *in++; 607 if (d < 0x80) { c= d; trailing= 0; } 608 else if (d < 0xC0) { 609 /* trailing byte in leading position */ 610 *outlen = (out - outstart) * 2; 611 *inlen = processed - instart; 612 return(-2); 613 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 614 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 615 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 616 else { 617 /* no chance for this in UTF-16 */ 618 *outlen = (out - outstart) * 2; 619 *inlen = processed - instart; 620 return(-2); 621 } 622 623 if (inend - in < trailing) { 624 break; 625 } 626 627 for ( ; trailing; trailing--) { 628 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) 629 break; 630 c <<= 6; 631 c |= d & 0x3F; 632 } 633 634 /* assertion: c is a single UTF-4 value */ 635 if (c < 0x10000) { 636 if (out >= outend) 637 break; 638 if (xmlLittleEndian) { 639 *out++ = c; 640 } else { 641 tmp = (unsigned char *) out; 642 *tmp = c ; 643 *(tmp + 1) = c >> 8 ; 644 out++; 645 } 646 } 647 else if (c < 0x110000) { 648 if (out+1 >= outend) 649 break; 650 c -= 0x10000; 651 if (xmlLittleEndian) { 652 *out++ = 0xD800 | (c >> 10); 653 *out++ = 0xDC00 | (c & 0x03FF); 654 } else { 655 tmp1 = 0xD800 | (c >> 10); 656 tmp = (unsigned char *) out; 657 *tmp = (unsigned char) tmp1; 658 *(tmp + 1) = tmp1 >> 8; 659 out++; 660 661 tmp2 = 0xDC00 | (c & 0x03FF); 662 tmp = (unsigned char *) out; 663 *tmp = (unsigned char) tmp2; 664 *(tmp + 1) = tmp2 >> 8; 665 out++; 666 } 667 } 668 else 669 break; 670 processed = in; 671 } 672 *outlen = (out - outstart) * 2; 673 *inlen = processed - instart; 674 return(*outlen); 675 } 676 677 /** 678 * UTF8ToUTF16: 679 * @outb: a pointer to an array of bytes to store the result 680 * @outlen: the length of @outb 681 * @in: a pointer to an array of UTF-8 chars 682 * @inlen: the length of @in 683 * 684 * Take a block of UTF-8 chars in and try to convert it to an UTF-16 685 * block of chars out. 686 * 687 * Returns the number of bytes written, or -1 if lack of space, or -2 688 * if the transcoding failed. 689 */ 690 static int 691 UTF8ToUTF16(unsigned char* outb, int *outlen, 692 const unsigned char* in, int *inlen) 693 { 694 if (in == NULL) { 695 /* 696 * initialization, add the Byte Order Mark for UTF-16LE 697 */ 698 if (*outlen >= 2) { 699 outb[0] = 0xFF; 700 outb[1] = 0xFE; 701 *outlen = 2; 702 *inlen = 0; 703 #ifdef DEBUG_ENCODING 704 xmlGenericError(xmlGenericErrorContext, 705 "Added FFFE Byte Order Mark\n"); 706 #endif 707 return(2); 708 } 709 *outlen = 0; 710 *inlen = 0; 711 return(0); 712 } 713 return (UTF8ToUTF16LE(outb, outlen, in, inlen)); 714 } 715 #endif /* LIBXML_OUTPUT_ENABLED */ 716 717 /** 718 * UTF16BEToUTF8: 719 * @out: a pointer to an array of bytes to store the result 720 * @outlen: the length of @out 721 * @inb: a pointer to an array of UTF-16 passed as a byte array 722 * @inlenb: the length of @in in UTF-16 chars 723 * 724 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8 725 * block of chars out. This function assumes the endian property 726 * is the same between the native type of this machine and the 727 * inputed one. 728 * 729 * Returns the number of bytes written, or -1 if lack of space, or -2 730 * if the transcoding fails (if *in is not a valid utf16 string) 731 * The value of *inlen after return is the number of octets consumed 732 * if the return value is positive, else unpredictable. 733 */ 734 static int 735 UTF16BEToUTF8(unsigned char* out, int *outlen, 736 const unsigned char* inb, int *inlenb) 737 { 738 unsigned char* outstart = out; 739 const unsigned char* processed = inb; 740 unsigned char* outend = out + *outlen; 741 unsigned short* in = (unsigned short*) inb; 742 unsigned short* inend; 743 unsigned int c, d, inlen; 744 unsigned char *tmp; 745 int bits; 746 747 if ((*inlenb % 2) == 1) 748 (*inlenb)--; 749 inlen = *inlenb / 2; 750 inend= in + inlen; 751 while (in < inend) { 752 if (xmlLittleEndian) { 753 tmp = (unsigned char *) in; 754 c = *tmp++; 755 c = c << 8; 756 c = c | (unsigned int) *tmp; 757 in++; 758 } else { 759 c= *in++; 760 } 761 if ((c & 0xFC00) == 0xD800) { /* surrogates */ 762 if (in >= inend) { /* (in > inend) shouldn't happens */ 763 *outlen = out - outstart; 764 *inlenb = processed - inb; 765 return(-2); 766 } 767 if (xmlLittleEndian) { 768 tmp = (unsigned char *) in; 769 d = *tmp++; 770 d = d << 8; 771 d = d | (unsigned int) *tmp; 772 in++; 773 } else { 774 d= *in++; 775 } 776 if ((d & 0xFC00) == 0xDC00) { 777 c &= 0x03FF; 778 c <<= 10; 779 c |= d & 0x03FF; 780 c += 0x10000; 781 } 782 else { 783 *outlen = out - outstart; 784 *inlenb = processed - inb; 785 return(-2); 786 } 787 } 788 789 /* assertion: c is a single UTF-4 value */ 790 if (out >= outend) 791 break; 792 if (c < 0x80) { *out++= c; bits= -6; } 793 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } 794 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; } 795 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; } 796 797 for ( ; bits >= 0; bits-= 6) { 798 if (out >= outend) 799 break; 800 *out++= ((c >> bits) & 0x3F) | 0x80; 801 } 802 processed = (const unsigned char*) in; 803 } 804 *outlen = out - outstart; 805 *inlenb = processed - inb; 806 return(*outlen); 807 } 808 809 #ifdef LIBXML_OUTPUT_ENABLED 810 /** 811 * UTF8ToUTF16BE: 812 * @outb: a pointer to an array of bytes to store the result 813 * @outlen: the length of @outb 814 * @in: a pointer to an array of UTF-8 chars 815 * @inlen: the length of @in 816 * 817 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE 818 * block of chars out. 819 * 820 * Returns the number of byte written, or -1 by lack of space, or -2 821 * if the transcoding failed. 822 */ 823 static int 824 UTF8ToUTF16BE(unsigned char* outb, int *outlen, 825 const unsigned char* in, int *inlen) 826 { 827 unsigned short* out = (unsigned short*) outb; 828 const unsigned char* processed = in; 829 const unsigned char *const instart = in; 830 unsigned short* outstart= out; 831 unsigned short* outend; 832 const unsigned char* inend; 833 unsigned int c, d; 834 int trailing; 835 unsigned char *tmp; 836 unsigned short tmp1, tmp2; 837 838 /* UTF-16BE has no BOM */ 839 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1); 840 if (in == NULL) { 841 *outlen = 0; 842 *inlen = 0; 843 return(0); 844 } 845 inend= in + *inlen; 846 outend = out + (*outlen / 2); 847 while (in < inend) { 848 d= *in++; 849 if (d < 0x80) { c= d; trailing= 0; } 850 else if (d < 0xC0) { 851 /* trailing byte in leading position */ 852 *outlen = out - outstart; 853 *inlen = processed - instart; 854 return(-2); 855 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } 856 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } 857 else if (d < 0xF8) { c= d & 0x07; trailing= 3; } 858 else { 859 /* no chance for this in UTF-16 */ 860 *outlen = out - outstart; 861 *inlen = processed - instart; 862 return(-2); 863 } 864 865 if (inend - in < trailing) { 866 break; 867 } 868 869 for ( ; trailing; trailing--) { 870 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break; 871 c <<= 6; 872 c |= d & 0x3F; 873 } 874 875 /* assertion: c is a single UTF-4 value */ 876 if (c < 0x10000) { 877 if (out >= outend) break; 878 if (xmlLittleEndian) { 879 tmp = (unsigned char *) out; 880 *tmp = c >> 8; 881 *(tmp + 1) = c; 882 out++; 883 } else { 884 *out++ = c; 885 } 886 } 887 else if (c < 0x110000) { 888 if (out+1 >= outend) break; 889 c -= 0x10000; 890 if (xmlLittleEndian) { 891 tmp1 = 0xD800 | (c >> 10); 892 tmp = (unsigned char *) out; 893 *tmp = tmp1 >> 8; 894 *(tmp + 1) = (unsigned char) tmp1; 895 out++; 896 897 tmp2 = 0xDC00 | (c & 0x03FF); 898 tmp = (unsigned char *) out; 899 *tmp = tmp2 >> 8; 900 *(tmp + 1) = (unsigned char) tmp2; 901 out++; 902 } else { 903 *out++ = 0xD800 | (c >> 10); 904 *out++ = 0xDC00 | (c & 0x03FF); 905 } 906 } 907 else 908 break; 909 processed = in; 910 } 911 *outlen = (out - outstart) * 2; 912 *inlen = processed - instart; 913 return(*outlen); 914 } 915 #endif /* LIBXML_OUTPUT_ENABLED */ 916 917 /************************************************************************ 918 * * 919 * Generic encoding handling routines * 920 * * 921 ************************************************************************/ 922 923 /** 924 * xmlDetectCharEncoding: 925 * @in: a pointer to the first bytes of the XML entity, must be at least 926 * 2 bytes long (at least 4 if encoding is UTF4 variant). 927 * @len: pointer to the length of the buffer 928 * 929 * Guess the encoding of the entity using the first bytes of the entity content 930 * according to the non-normative appendix F of the XML-1.0 recommendation. 931 * 932 * Returns one of the XML_CHAR_ENCODING_... values. 933 */ 934 xmlCharEncoding 935 xmlDetectCharEncoding(const unsigned char* in, int len) 936 { 937 if (in == NULL) 938 return(XML_CHAR_ENCODING_NONE); 939 if (len >= 4) { 940 if ((in[0] == 0x00) && (in[1] == 0x00) && 941 (in[2] == 0x00) && (in[3] == 0x3C)) 942 return(XML_CHAR_ENCODING_UCS4BE); 943 if ((in[0] == 0x3C) && (in[1] == 0x00) && 944 (in[2] == 0x00) && (in[3] == 0x00)) 945 return(XML_CHAR_ENCODING_UCS4LE); 946 if ((in[0] == 0x00) && (in[1] == 0x00) && 947 (in[2] == 0x3C) && (in[3] == 0x00)) 948 return(XML_CHAR_ENCODING_UCS4_2143); 949 if ((in[0] == 0x00) && (in[1] == 0x3C) && 950 (in[2] == 0x00) && (in[3] == 0x00)) 951 return(XML_CHAR_ENCODING_UCS4_3412); 952 if ((in[0] == 0x4C) && (in[1] == 0x6F) && 953 (in[2] == 0xA7) && (in[3] == 0x94)) 954 return(XML_CHAR_ENCODING_EBCDIC); 955 if ((in[0] == 0x3C) && (in[1] == 0x3F) && 956 (in[2] == 0x78) && (in[3] == 0x6D)) 957 return(XML_CHAR_ENCODING_UTF8); 958 /* 959 * Although not part of the recommendation, we also 960 * attempt an "auto-recognition" of UTF-16LE and 961 * UTF-16BE encodings. 962 */ 963 if ((in[0] == 0x3C) && (in[1] == 0x00) && 964 (in[2] == 0x3F) && (in[3] == 0x00)) 965 return(XML_CHAR_ENCODING_UTF16LE); 966 if ((in[0] == 0x00) && (in[1] == 0x3C) && 967 (in[2] == 0x00) && (in[3] == 0x3F)) 968 return(XML_CHAR_ENCODING_UTF16BE); 969 } 970 if (len >= 3) { 971 /* 972 * Errata on XML-1.0 June 20 2001 973 * We now allow an UTF8 encoded BOM 974 */ 975 if ((in[0] == 0xEF) && (in[1] == 0xBB) && 976 (in[2] == 0xBF)) 977 return(XML_CHAR_ENCODING_UTF8); 978 } 979 /* For UTF-16 we can recognize by the BOM */ 980 if (len >= 2) { 981 if ((in[0] == 0xFE) && (in[1] == 0xFF)) 982 return(XML_CHAR_ENCODING_UTF16BE); 983 if ((in[0] == 0xFF) && (in[1] == 0xFE)) 984 return(XML_CHAR_ENCODING_UTF16LE); 985 } 986 return(XML_CHAR_ENCODING_NONE); 987 } 988 989 /** 990 * xmlCleanupEncodingAliases: 991 * 992 * Unregisters all aliases 993 */ 994 void 995 xmlCleanupEncodingAliases(void) { 996 int i; 997 998 if (xmlCharEncodingAliases == NULL) 999 return; 1000 1001 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1002 if (xmlCharEncodingAliases[i].name != NULL) 1003 xmlFree((char *) xmlCharEncodingAliases[i].name); 1004 if (xmlCharEncodingAliases[i].alias != NULL) 1005 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1006 } 1007 xmlCharEncodingAliasesNb = 0; 1008 xmlCharEncodingAliasesMax = 0; 1009 xmlFree(xmlCharEncodingAliases); 1010 xmlCharEncodingAliases = NULL; 1011 } 1012 1013 /** 1014 * xmlGetEncodingAlias: 1015 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1016 * 1017 * Lookup an encoding name for the given alias. 1018 * 1019 * Returns NULL if not found, otherwise the original name 1020 */ 1021 const char * 1022 xmlGetEncodingAlias(const char *alias) { 1023 int i; 1024 char upper[100]; 1025 1026 if (alias == NULL) 1027 return(NULL); 1028 1029 if (xmlCharEncodingAliases == NULL) 1030 return(NULL); 1031 1032 for (i = 0;i < 99;i++) { 1033 upper[i] = toupper(alias[i]); 1034 if (upper[i] == 0) break; 1035 } 1036 upper[i] = 0; 1037 1038 /* 1039 * Walk down the list looking for a definition of the alias 1040 */ 1041 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1042 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1043 return(xmlCharEncodingAliases[i].name); 1044 } 1045 } 1046 return(NULL); 1047 } 1048 1049 /** 1050 * xmlAddEncodingAlias: 1051 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1052 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1053 * 1054 * Registers an alias @alias for an encoding named @name. Existing alias 1055 * will be overwritten. 1056 * 1057 * Returns 0 in case of success, -1 in case of error 1058 */ 1059 int 1060 xmlAddEncodingAlias(const char *name, const char *alias) { 1061 int i; 1062 char upper[100]; 1063 1064 if ((name == NULL) || (alias == NULL)) 1065 return(-1); 1066 1067 for (i = 0;i < 99;i++) { 1068 upper[i] = toupper(alias[i]); 1069 if (upper[i] == 0) break; 1070 } 1071 upper[i] = 0; 1072 1073 if (xmlCharEncodingAliases == NULL) { 1074 xmlCharEncodingAliasesNb = 0; 1075 xmlCharEncodingAliasesMax = 20; 1076 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1077 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1078 if (xmlCharEncodingAliases == NULL) 1079 return(-1); 1080 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) { 1081 xmlCharEncodingAliasesMax *= 2; 1082 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 1083 xmlRealloc(xmlCharEncodingAliases, 1084 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias)); 1085 } 1086 /* 1087 * Walk down the list looking for a definition of the alias 1088 */ 1089 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1090 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) { 1091 /* 1092 * Replace the definition. 1093 */ 1094 xmlFree((char *) xmlCharEncodingAliases[i].name); 1095 xmlCharEncodingAliases[i].name = xmlMemStrdup(name); 1096 return(0); 1097 } 1098 } 1099 /* 1100 * Add the definition 1101 */ 1102 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name); 1103 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper); 1104 xmlCharEncodingAliasesNb++; 1105 return(0); 1106 } 1107 1108 /** 1109 * xmlDelEncodingAlias: 1110 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually) 1111 * 1112 * Unregisters an encoding alias @alias 1113 * 1114 * Returns 0 in case of success, -1 in case of error 1115 */ 1116 int 1117 xmlDelEncodingAlias(const char *alias) { 1118 int i; 1119 1120 if (alias == NULL) 1121 return(-1); 1122 1123 if (xmlCharEncodingAliases == NULL) 1124 return(-1); 1125 /* 1126 * Walk down the list looking for a definition of the alias 1127 */ 1128 for (i = 0;i < xmlCharEncodingAliasesNb;i++) { 1129 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) { 1130 xmlFree((char *) xmlCharEncodingAliases[i].name); 1131 xmlFree((char *) xmlCharEncodingAliases[i].alias); 1132 xmlCharEncodingAliasesNb--; 1133 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1], 1134 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i)); 1135 return(0); 1136 } 1137 } 1138 return(-1); 1139 } 1140 1141 /** 1142 * xmlParseCharEncoding: 1143 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually) 1144 * 1145 * Compare the string to the encoding schemes already known. Note 1146 * that the comparison is case insensitive accordingly to the section 1147 * [XML] 4.3.3 Character Encoding in Entities. 1148 * 1149 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE 1150 * if not recognized. 1151 */ 1152 xmlCharEncoding 1153 xmlParseCharEncoding(const char* name) 1154 { 1155 const char *alias; 1156 char upper[500]; 1157 int i; 1158 1159 if (name == NULL) 1160 return(XML_CHAR_ENCODING_NONE); 1161 1162 /* 1163 * Do the alias resolution 1164 */ 1165 alias = xmlGetEncodingAlias(name); 1166 if (alias != NULL) 1167 name = alias; 1168 1169 for (i = 0;i < 499;i++) { 1170 upper[i] = toupper(name[i]); 1171 if (upper[i] == 0) break; 1172 } 1173 upper[i] = 0; 1174 1175 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE); 1176 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8); 1177 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8); 1178 1179 /* 1180 * NOTE: if we were able to parse this, the endianness of UTF16 is 1181 * already found and in use 1182 */ 1183 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE); 1184 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE); 1185 1186 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1187 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2); 1188 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2); 1189 1190 /* 1191 * NOTE: if we were able to parse this, the endianness of UCS4 is 1192 * already found and in use 1193 */ 1194 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1195 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE); 1196 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE); 1197 1198 1199 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1); 1200 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1); 1201 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1); 1202 1203 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2); 1204 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2); 1205 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2); 1206 1207 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3); 1208 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4); 1209 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5); 1210 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6); 1211 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7); 1212 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8); 1213 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); 1214 1215 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); 1216 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); 1217 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); 1218 1219 #ifdef DEBUG_ENCODING 1220 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name); 1221 #endif 1222 return(XML_CHAR_ENCODING_ERROR); 1223 } 1224 1225 /** 1226 * xmlGetCharEncodingName: 1227 * @enc: the encoding 1228 * 1229 * The "canonical" name for XML encoding. 1230 * C.f. http://www.w3.org/TR/REC-xml#charencoding 1231 * Section 4.3.3 Character Encoding in Entities 1232 * 1233 * Returns the canonical name for the given encoding 1234 */ 1235 1236 const char* 1237 xmlGetCharEncodingName(xmlCharEncoding enc) { 1238 switch (enc) { 1239 case XML_CHAR_ENCODING_ERROR: 1240 return(NULL); 1241 case XML_CHAR_ENCODING_NONE: 1242 return(NULL); 1243 case XML_CHAR_ENCODING_UTF8: 1244 return("UTF-8"); 1245 case XML_CHAR_ENCODING_UTF16LE: 1246 return("UTF-16"); 1247 case XML_CHAR_ENCODING_UTF16BE: 1248 return("UTF-16"); 1249 case XML_CHAR_ENCODING_EBCDIC: 1250 return("EBCDIC"); 1251 case XML_CHAR_ENCODING_UCS4LE: 1252 return("ISO-10646-UCS-4"); 1253 case XML_CHAR_ENCODING_UCS4BE: 1254 return("ISO-10646-UCS-4"); 1255 case XML_CHAR_ENCODING_UCS4_2143: 1256 return("ISO-10646-UCS-4"); 1257 case XML_CHAR_ENCODING_UCS4_3412: 1258 return("ISO-10646-UCS-4"); 1259 case XML_CHAR_ENCODING_UCS2: 1260 return("ISO-10646-UCS-2"); 1261 case XML_CHAR_ENCODING_8859_1: 1262 return("ISO-8859-1"); 1263 case XML_CHAR_ENCODING_8859_2: 1264 return("ISO-8859-2"); 1265 case XML_CHAR_ENCODING_8859_3: 1266 return("ISO-8859-3"); 1267 case XML_CHAR_ENCODING_8859_4: 1268 return("ISO-8859-4"); 1269 case XML_CHAR_ENCODING_8859_5: 1270 return("ISO-8859-5"); 1271 case XML_CHAR_ENCODING_8859_6: 1272 return("ISO-8859-6"); 1273 case XML_CHAR_ENCODING_8859_7: 1274 return("ISO-8859-7"); 1275 case XML_CHAR_ENCODING_8859_8: 1276 return("ISO-8859-8"); 1277 case XML_CHAR_ENCODING_8859_9: 1278 return("ISO-8859-9"); 1279 case XML_CHAR_ENCODING_2022_JP: 1280 return("ISO-2022-JP"); 1281 case XML_CHAR_ENCODING_SHIFT_JIS: 1282 return("Shift-JIS"); 1283 case XML_CHAR_ENCODING_EUC_JP: 1284 return("EUC-JP"); 1285 case XML_CHAR_ENCODING_ASCII: 1286 return(NULL); 1287 } 1288 return(NULL); 1289 } 1290 1291 /************************************************************************ 1292 * * 1293 * Char encoding handlers * 1294 * * 1295 ************************************************************************/ 1296 1297 1298 /* the size should be growable, but it's not a big deal ... */ 1299 #define MAX_ENCODING_HANDLERS 50 1300 static xmlCharEncodingHandlerPtr *handlers = NULL; 1301 static int nbCharEncodingHandler = 0; 1302 1303 /* 1304 * The default is UTF-8 for XML, that's also the default used for the 1305 * parser internals, so the default encoding handler is NULL 1306 */ 1307 1308 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL; 1309 1310 /** 1311 * xmlNewCharEncodingHandler: 1312 * @name: the encoding name, in UTF-8 format (ASCII actually) 1313 * @input: the xmlCharEncodingInputFunc to read that encoding 1314 * @output: the xmlCharEncodingOutputFunc to write that encoding 1315 * 1316 * Create and registers an xmlCharEncodingHandler. 1317 * 1318 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error). 1319 */ 1320 xmlCharEncodingHandlerPtr 1321 xmlNewCharEncodingHandler(const char *name, 1322 xmlCharEncodingInputFunc input, 1323 xmlCharEncodingOutputFunc output) { 1324 xmlCharEncodingHandlerPtr handler; 1325 const char *alias; 1326 char upper[500]; 1327 int i; 1328 char *up = NULL; 1329 1330 /* 1331 * Do the alias resolution 1332 */ 1333 alias = xmlGetEncodingAlias(name); 1334 if (alias != NULL) 1335 name = alias; 1336 1337 /* 1338 * Keep only the uppercase version of the encoding. 1339 */ 1340 if (name == NULL) { 1341 xmlEncodingErr(XML_I18N_NO_NAME, 1342 "xmlNewCharEncodingHandler : no name !\n", NULL); 1343 return(NULL); 1344 } 1345 for (i = 0;i < 499;i++) { 1346 upper[i] = toupper(name[i]); 1347 if (upper[i] == 0) break; 1348 } 1349 upper[i] = 0; 1350 up = xmlMemStrdup(upper); 1351 if (up == NULL) { 1352 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1353 return(NULL); 1354 } 1355 1356 /* 1357 * allocate and fill-up an handler block. 1358 */ 1359 handler = (xmlCharEncodingHandlerPtr) 1360 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1361 if (handler == NULL) { 1362 xmlFree(up); 1363 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n"); 1364 return(NULL); 1365 } 1366 memset(handler, 0, sizeof(xmlCharEncodingHandler)); 1367 handler->input = input; 1368 handler->output = output; 1369 handler->name = up; 1370 1371 #ifdef LIBXML_ICONV_ENABLED 1372 handler->iconv_in = NULL; 1373 handler->iconv_out = NULL; 1374 #endif 1375 #ifdef LIBXML_ICU_ENABLED 1376 handler->uconv_in = NULL; 1377 handler->uconv_out = NULL; 1378 #endif 1379 1380 /* 1381 * registers and returns the handler. 1382 */ 1383 xmlRegisterCharEncodingHandler(handler); 1384 #ifdef DEBUG_ENCODING 1385 xmlGenericError(xmlGenericErrorContext, 1386 "Registered encoding handler for %s\n", name); 1387 #endif 1388 return(handler); 1389 } 1390 1391 /** 1392 * xmlInitCharEncodingHandlers: 1393 * 1394 * Initialize the char encoding support, it registers the default 1395 * encoding supported. 1396 * NOTE: while public, this function usually doesn't need to be called 1397 * in normal processing. 1398 */ 1399 void 1400 xmlInitCharEncodingHandlers(void) { 1401 unsigned short int tst = 0x1234; 1402 unsigned char *ptr = (unsigned char *) &tst; 1403 1404 if (handlers != NULL) return; 1405 1406 handlers = (xmlCharEncodingHandlerPtr *) 1407 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr)); 1408 1409 if (*ptr == 0x12) xmlLittleEndian = 0; 1410 else if (*ptr == 0x34) xmlLittleEndian = 1; 1411 else { 1412 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1413 "Odd problem at endianness detection\n", NULL); 1414 } 1415 1416 if (handlers == NULL) { 1417 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n"); 1418 return; 1419 } 1420 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8); 1421 #ifdef LIBXML_OUTPUT_ENABLED 1422 xmlUTF16LEHandler = 1423 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); 1424 xmlUTF16BEHandler = 1425 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); 1426 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16); 1427 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); 1428 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); 1429 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii); 1430 #ifdef LIBXML_HTML_ENABLED 1431 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml); 1432 #endif 1433 #else 1434 xmlUTF16LEHandler = 1435 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL); 1436 xmlUTF16BEHandler = 1437 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL); 1438 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL); 1439 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL); 1440 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL); 1441 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL); 1442 #endif /* LIBXML_OUTPUT_ENABLED */ 1443 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 1444 #ifdef LIBXML_ISO8859X_ENABLED 1445 xmlRegisterCharEncodingHandlersISO8859x (); 1446 #endif 1447 #endif 1448 1449 } 1450 1451 /** 1452 * xmlCleanupCharEncodingHandlers: 1453 * 1454 * Cleanup the memory allocated for the char encoding support, it 1455 * unregisters all the encoding handlers and the aliases. 1456 */ 1457 void 1458 xmlCleanupCharEncodingHandlers(void) { 1459 xmlCleanupEncodingAliases(); 1460 1461 if (handlers == NULL) return; 1462 1463 for (;nbCharEncodingHandler > 0;) { 1464 nbCharEncodingHandler--; 1465 if (handlers[nbCharEncodingHandler] != NULL) { 1466 if (handlers[nbCharEncodingHandler]->name != NULL) 1467 xmlFree(handlers[nbCharEncodingHandler]->name); 1468 xmlFree(handlers[nbCharEncodingHandler]); 1469 } 1470 } 1471 xmlFree(handlers); 1472 handlers = NULL; 1473 nbCharEncodingHandler = 0; 1474 xmlDefaultCharEncodingHandler = NULL; 1475 } 1476 1477 /** 1478 * xmlRegisterCharEncodingHandler: 1479 * @handler: the xmlCharEncodingHandlerPtr handler block 1480 * 1481 * Register the char encoding handler, surprising, isn't it ? 1482 */ 1483 void 1484 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { 1485 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1486 if ((handler == NULL) || (handlers == NULL)) { 1487 xmlEncodingErr(XML_I18N_NO_HANDLER, 1488 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); 1489 return; 1490 } 1491 1492 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { 1493 xmlEncodingErr(XML_I18N_EXCESS_HANDLER, 1494 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", 1495 "MAX_ENCODING_HANDLERS"); 1496 return; 1497 } 1498 handlers[nbCharEncodingHandler++] = handler; 1499 } 1500 1501 /** 1502 * xmlGetCharEncodingHandler: 1503 * @enc: an xmlCharEncoding value. 1504 * 1505 * Search in the registered set the handler able to read/write that encoding. 1506 * 1507 * Returns the handler or NULL if not found 1508 */ 1509 xmlCharEncodingHandlerPtr 1510 xmlGetCharEncodingHandler(xmlCharEncoding enc) { 1511 xmlCharEncodingHandlerPtr handler; 1512 1513 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1514 switch (enc) { 1515 case XML_CHAR_ENCODING_ERROR: 1516 return(NULL); 1517 case XML_CHAR_ENCODING_NONE: 1518 return(NULL); 1519 case XML_CHAR_ENCODING_UTF8: 1520 return(NULL); 1521 case XML_CHAR_ENCODING_UTF16LE: 1522 return(xmlUTF16LEHandler); 1523 case XML_CHAR_ENCODING_UTF16BE: 1524 return(xmlUTF16BEHandler); 1525 case XML_CHAR_ENCODING_EBCDIC: 1526 handler = xmlFindCharEncodingHandler("EBCDIC"); 1527 if (handler != NULL) return(handler); 1528 handler = xmlFindCharEncodingHandler("ebcdic"); 1529 if (handler != NULL) return(handler); 1530 handler = xmlFindCharEncodingHandler("EBCDIC-US"); 1531 if (handler != NULL) return(handler); 1532 handler = xmlFindCharEncodingHandler("IBM-037"); 1533 if (handler != NULL) return(handler); 1534 break; 1535 case XML_CHAR_ENCODING_UCS4BE: 1536 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1537 if (handler != NULL) return(handler); 1538 handler = xmlFindCharEncodingHandler("UCS-4"); 1539 if (handler != NULL) return(handler); 1540 handler = xmlFindCharEncodingHandler("UCS4"); 1541 if (handler != NULL) return(handler); 1542 break; 1543 case XML_CHAR_ENCODING_UCS4LE: 1544 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); 1545 if (handler != NULL) return(handler); 1546 handler = xmlFindCharEncodingHandler("UCS-4"); 1547 if (handler != NULL) return(handler); 1548 handler = xmlFindCharEncodingHandler("UCS4"); 1549 if (handler != NULL) return(handler); 1550 break; 1551 case XML_CHAR_ENCODING_UCS4_2143: 1552 break; 1553 case XML_CHAR_ENCODING_UCS4_3412: 1554 break; 1555 case XML_CHAR_ENCODING_UCS2: 1556 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); 1557 if (handler != NULL) return(handler); 1558 handler = xmlFindCharEncodingHandler("UCS-2"); 1559 if (handler != NULL) return(handler); 1560 handler = xmlFindCharEncodingHandler("UCS2"); 1561 if (handler != NULL) return(handler); 1562 break; 1563 1564 /* 1565 * We used to keep ISO Latin encodings native in the 1566 * generated data. This led to so many problems that 1567 * this has been removed. One can still change this 1568 * back by registering no-ops encoders for those 1569 */ 1570 case XML_CHAR_ENCODING_8859_1: 1571 handler = xmlFindCharEncodingHandler("ISO-8859-1"); 1572 if (handler != NULL) return(handler); 1573 break; 1574 case XML_CHAR_ENCODING_8859_2: 1575 handler = xmlFindCharEncodingHandler("ISO-8859-2"); 1576 if (handler != NULL) return(handler); 1577 break; 1578 case XML_CHAR_ENCODING_8859_3: 1579 handler = xmlFindCharEncodingHandler("ISO-8859-3"); 1580 if (handler != NULL) return(handler); 1581 break; 1582 case XML_CHAR_ENCODING_8859_4: 1583 handler = xmlFindCharEncodingHandler("ISO-8859-4"); 1584 if (handler != NULL) return(handler); 1585 break; 1586 case XML_CHAR_ENCODING_8859_5: 1587 handler = xmlFindCharEncodingHandler("ISO-8859-5"); 1588 if (handler != NULL) return(handler); 1589 break; 1590 case XML_CHAR_ENCODING_8859_6: 1591 handler = xmlFindCharEncodingHandler("ISO-8859-6"); 1592 if (handler != NULL) return(handler); 1593 break; 1594 case XML_CHAR_ENCODING_8859_7: 1595 handler = xmlFindCharEncodingHandler("ISO-8859-7"); 1596 if (handler != NULL) return(handler); 1597 break; 1598 case XML_CHAR_ENCODING_8859_8: 1599 handler = xmlFindCharEncodingHandler("ISO-8859-8"); 1600 if (handler != NULL) return(handler); 1601 break; 1602 case XML_CHAR_ENCODING_8859_9: 1603 handler = xmlFindCharEncodingHandler("ISO-8859-9"); 1604 if (handler != NULL) return(handler); 1605 break; 1606 1607 1608 case XML_CHAR_ENCODING_2022_JP: 1609 handler = xmlFindCharEncodingHandler("ISO-2022-JP"); 1610 if (handler != NULL) return(handler); 1611 break; 1612 case XML_CHAR_ENCODING_SHIFT_JIS: 1613 handler = xmlFindCharEncodingHandler("SHIFT-JIS"); 1614 if (handler != NULL) return(handler); 1615 handler = xmlFindCharEncodingHandler("SHIFT_JIS"); 1616 if (handler != NULL) return(handler); 1617 handler = xmlFindCharEncodingHandler("Shift_JIS"); 1618 if (handler != NULL) return(handler); 1619 break; 1620 case XML_CHAR_ENCODING_EUC_JP: 1621 handler = xmlFindCharEncodingHandler("EUC-JP"); 1622 if (handler != NULL) return(handler); 1623 break; 1624 default: 1625 break; 1626 } 1627 1628 #ifdef DEBUG_ENCODING 1629 xmlGenericError(xmlGenericErrorContext, 1630 "No handler found for encoding %d\n", enc); 1631 #endif 1632 return(NULL); 1633 } 1634 1635 /** 1636 * xmlFindCharEncodingHandler: 1637 * @name: a string describing the char encoding. 1638 * 1639 * Search in the registered set the handler able to read/write that encoding. 1640 * 1641 * Returns the handler or NULL if not found 1642 */ 1643 xmlCharEncodingHandlerPtr 1644 xmlFindCharEncodingHandler(const char *name) { 1645 const char *nalias; 1646 const char *norig; 1647 xmlCharEncoding alias; 1648 #ifdef LIBXML_ICONV_ENABLED 1649 xmlCharEncodingHandlerPtr enc; 1650 iconv_t icv_in, icv_out; 1651 #endif /* LIBXML_ICONV_ENABLED */ 1652 #ifdef LIBXML_ICU_ENABLED 1653 xmlCharEncodingHandlerPtr encu; 1654 uconv_t *ucv_in, *ucv_out; 1655 #endif /* LIBXML_ICU_ENABLED */ 1656 char upper[100]; 1657 int i; 1658 1659 if (handlers == NULL) xmlInitCharEncodingHandlers(); 1660 if (name == NULL) return(xmlDefaultCharEncodingHandler); 1661 if (name[0] == 0) return(xmlDefaultCharEncodingHandler); 1662 1663 /* 1664 * Do the alias resolution 1665 */ 1666 norig = name; 1667 nalias = xmlGetEncodingAlias(name); 1668 if (nalias != NULL) 1669 name = nalias; 1670 1671 /* 1672 * Check first for directly registered encoding names 1673 */ 1674 for (i = 0;i < 99;i++) { 1675 upper[i] = toupper(name[i]); 1676 if (upper[i] == 0) break; 1677 } 1678 upper[i] = 0; 1679 1680 if (handlers != NULL) { 1681 for (i = 0;i < nbCharEncodingHandler; i++) { 1682 if (!strcmp(upper, handlers[i]->name)) { 1683 #ifdef DEBUG_ENCODING 1684 xmlGenericError(xmlGenericErrorContext, 1685 "Found registered handler for encoding %s\n", name); 1686 #endif 1687 return(handlers[i]); 1688 } 1689 } 1690 } 1691 1692 #ifdef LIBXML_ICONV_ENABLED 1693 /* check whether iconv can handle this */ 1694 icv_in = iconv_open("UTF-8", name); 1695 icv_out = iconv_open(name, "UTF-8"); 1696 if (icv_in == (iconv_t) -1) { 1697 icv_in = iconv_open("UTF-8", upper); 1698 } 1699 if (icv_out == (iconv_t) -1) { 1700 icv_out = iconv_open(upper, "UTF-8"); 1701 } 1702 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { 1703 enc = (xmlCharEncodingHandlerPtr) 1704 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1705 if (enc == NULL) { 1706 iconv_close(icv_in); 1707 iconv_close(icv_out); 1708 return(NULL); 1709 } 1710 memset(enc, 0, sizeof(xmlCharEncodingHandler)); 1711 enc->name = xmlMemStrdup(name); 1712 enc->input = NULL; 1713 enc->output = NULL; 1714 enc->iconv_in = icv_in; 1715 enc->iconv_out = icv_out; 1716 #ifdef DEBUG_ENCODING 1717 xmlGenericError(xmlGenericErrorContext, 1718 "Found iconv handler for encoding %s\n", name); 1719 #endif 1720 return enc; 1721 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { 1722 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1723 "iconv : problems with filters for '%s'\n", name); 1724 } 1725 #endif /* LIBXML_ICONV_ENABLED */ 1726 #ifdef LIBXML_ICU_ENABLED 1727 /* check whether icu can handle this */ 1728 ucv_in = openIcuConverter(name, 1); 1729 ucv_out = openIcuConverter(name, 0); 1730 if (ucv_in != NULL && ucv_out != NULL) { 1731 encu = (xmlCharEncodingHandlerPtr) 1732 xmlMalloc(sizeof(xmlCharEncodingHandler)); 1733 if (encu == NULL) { 1734 closeIcuConverter(ucv_in); 1735 closeIcuConverter(ucv_out); 1736 return(NULL); 1737 } 1738 memset(encu, 0, sizeof(xmlCharEncodingHandler)); 1739 encu->name = xmlMemStrdup(name); 1740 encu->input = NULL; 1741 encu->output = NULL; 1742 encu->uconv_in = ucv_in; 1743 encu->uconv_out = ucv_out; 1744 #ifdef DEBUG_ENCODING 1745 xmlGenericError(xmlGenericErrorContext, 1746 "Found ICU converter handler for encoding %s\n", name); 1747 #endif 1748 return encu; 1749 } else if (ucv_in != NULL || ucv_out != NULL) { 1750 closeIcuConverter(ucv_in); 1751 closeIcuConverter(ucv_out); 1752 xmlEncodingErr(XML_ERR_INTERNAL_ERROR, 1753 "ICU converter : problems with filters for '%s'\n", name); 1754 } 1755 #endif /* LIBXML_ICU_ENABLED */ 1756 1757 #ifdef DEBUG_ENCODING 1758 xmlGenericError(xmlGenericErrorContext, 1759 "No handler found for encoding %s\n", name); 1760 #endif 1761 1762 /* 1763 * Fallback using the canonical names 1764 */ 1765 alias = xmlParseCharEncoding(norig); 1766 if (alias != XML_CHAR_ENCODING_ERROR) { 1767 const char* canon; 1768 canon = xmlGetCharEncodingName(alias); 1769 if ((canon != NULL) && (strcmp(name, canon))) { 1770 return(xmlFindCharEncodingHandler(canon)); 1771 } 1772 } 1773 1774 /* If "none of the above", give up */ 1775 return(NULL); 1776 } 1777 1778 /************************************************************************ 1779 * * 1780 * ICONV based generic conversion functions * 1781 * * 1782 ************************************************************************/ 1783 1784 #ifdef LIBXML_ICONV_ENABLED 1785 /** 1786 * xmlIconvWrapper: 1787 * @cd: iconv converter data structure 1788 * @out: a pointer to an array of bytes to store the result 1789 * @outlen: the length of @out 1790 * @in: a pointer to an array of ISO Latin 1 chars 1791 * @inlen: the length of @in 1792 * 1793 * Returns 0 if success, or 1794 * -1 by lack of space, or 1795 * -2 if the transcoding fails (for *in is not valid utf8 string or 1796 * the result of transformation can't fit into the encoding we want), or 1797 * -3 if there the last byte can't form a single output char. 1798 * 1799 * The value of @inlen after return is the number of octets consumed 1800 * as the return value is positive, else unpredictable. 1801 * The value of @outlen after return is the number of ocetes consumed. 1802 */ 1803 static int 1804 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, 1805 const unsigned char *in, int *inlen) { 1806 size_t icv_inlen, icv_outlen; 1807 const char *icv_in = (const char *) in; 1808 char *icv_out = (char *) out; 1809 int ret; 1810 1811 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1812 if (outlen != NULL) *outlen = 0; 1813 return(-1); 1814 } 1815 icv_inlen = *inlen; 1816 icv_outlen = *outlen; 1817 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); 1818 *inlen -= icv_inlen; 1819 *outlen -= icv_outlen; 1820 if ((icv_inlen != 0) || (ret == -1)) { 1821 #ifdef EILSEQ 1822 if (errno == EILSEQ) { 1823 return -2; 1824 } else 1825 #endif 1826 #ifdef E2BIG 1827 if (errno == E2BIG) { 1828 return -1; 1829 } else 1830 #endif 1831 #ifdef EINVAL 1832 if (errno == EINVAL) { 1833 return -3; 1834 } else 1835 #endif 1836 { 1837 return -3; 1838 } 1839 } 1840 return 0; 1841 } 1842 #endif /* LIBXML_ICONV_ENABLED */ 1843 1844 /************************************************************************ 1845 * * 1846 * ICU based generic conversion functions * 1847 * * 1848 ************************************************************************/ 1849 1850 #ifdef LIBXML_ICU_ENABLED 1851 /** 1852 * xmlUconvWrapper: 1853 * @cd: ICU uconverter data structure 1854 * @toUnicode : non-zero if toUnicode. 0 otherwise. 1855 * @out: a pointer to an array of bytes to store the result 1856 * @outlen: the length of @out 1857 * @in: a pointer to an array of ISO Latin 1 chars 1858 * @inlen: the length of @in 1859 * 1860 * Returns 0 if success, or 1861 * -1 by lack of space, or 1862 * -2 if the transcoding fails (for *in is not valid utf8 string or 1863 * the result of transformation can't fit into the encoding we want), or 1864 * -3 if there the last byte can't form a single output char. 1865 * 1866 * The value of @inlen after return is the number of octets consumed 1867 * as the return value is positive, else unpredictable. 1868 * The value of @outlen after return is the number of ocetes consumed. 1869 */ 1870 static int 1871 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, 1872 const unsigned char *in, int *inlen) { 1873 const char *ucv_in = (const char *) in; 1874 char *ucv_out = (char *) out; 1875 UErrorCode err = U_ZERO_ERROR; 1876 1877 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { 1878 if (outlen != NULL) *outlen = 0; 1879 return(-1); 1880 } 1881 1882 /* 1883 * TODO(jungshik) 1884 * 1. is ucnv_convert(To|From)Algorithmic better? 1885 * 2. had we better use an explicit pivot buffer? 1886 * 3. error returned comes from 'fromUnicode' only even 1887 * when toUnicode is true ! 1888 */ 1889 if (toUnicode) { 1890 /* encoding => UTF-16 => UTF-8 */ 1891 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen, 1892 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1893 0, TRUE, &err); 1894 } else { 1895 /* UTF-8 => UTF-16 => encoding */ 1896 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen, 1897 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL, 1898 0, TRUE, &err); 1899 } 1900 *inlen = ucv_in - (const char*) in; 1901 *outlen = ucv_out - (char *) out; 1902 if (U_SUCCESS(err)) 1903 return 0; 1904 if (err == U_BUFFER_OVERFLOW_ERROR) 1905 return -1; 1906 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND) 1907 return -2; 1908 /* if (err == U_TRUNCATED_CHAR_FOUND) */ 1909 return -3; 1910 } 1911 #endif /* LIBXML_ICU_ENABLED */ 1912 1913 /************************************************************************ 1914 * * 1915 * The real API used by libxml for on-the-fly conversion * 1916 * * 1917 ************************************************************************/ 1918 1919 /** 1920 * xmlCharEncFirstLineInt: 1921 * @handler: char enconding transformation data structure 1922 * @out: an xmlBuffer for the output. 1923 * @in: an xmlBuffer for the input 1924 * @len: number of bytes to convert for the first line, or -1 1925 * 1926 * Front-end for the encoding handler input function, but handle only 1927 * the very first line, i.e. limit itself to 45 chars. 1928 * 1929 * Returns the number of byte written if success, or 1930 * -1 general error 1931 * -2 if the transcoding fails (for *in is not valid utf8 string or 1932 * the result of transformation can't fit into the encoding we want), or 1933 */ 1934 int 1935 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 1936 xmlBufferPtr in, int len) { 1937 int ret = -2; 1938 int written; 1939 int toconv; 1940 1941 if (handler == NULL) return(-1); 1942 if (out == NULL) return(-1); 1943 if (in == NULL) return(-1); 1944 1945 /* calculate space available */ 1946 written = out->size - out->use - 1; /* count '\0' */ 1947 toconv = in->use; 1948 /* 1949 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 1950 * 45 chars should be sufficient to reach the end of the encoding 1951 * declaration without going too far inside the document content. 1952 * on UTF-16 this means 90bytes, on UCS4 this means 180 1953 * The actual value depending on guessed encoding is passed as @len 1954 * if provided 1955 */ 1956 if (len >= 0) { 1957 if (toconv > len) 1958 toconv = len; 1959 } else { 1960 if (toconv > 180) 1961 toconv = 180; 1962 } 1963 if (toconv * 2 >= written) { 1964 xmlBufferGrow(out, toconv * 2); 1965 written = out->size - out->use - 1; 1966 } 1967 1968 if (handler->input != NULL) { 1969 ret = handler->input(&out->content[out->use], &written, 1970 in->content, &toconv); 1971 xmlBufferShrink(in, toconv); 1972 out->use += written; 1973 out->content[out->use] = 0; 1974 } 1975 #ifdef LIBXML_ICONV_ENABLED 1976 else if (handler->iconv_in != NULL) { 1977 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 1978 &written, in->content, &toconv); 1979 xmlBufferShrink(in, toconv); 1980 out->use += written; 1981 out->content[out->use] = 0; 1982 if (ret == -1) ret = -3; 1983 } 1984 #endif /* LIBXML_ICONV_ENABLED */ 1985 #ifdef LIBXML_ICU_ENABLED 1986 else if (handler->uconv_in != NULL) { 1987 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 1988 &written, in->content, &toconv); 1989 xmlBufferShrink(in, toconv); 1990 out->use += written; 1991 out->content[out->use] = 0; 1992 if (ret == -1) ret = -3; 1993 } 1994 #endif /* LIBXML_ICU_ENABLED */ 1995 #ifdef DEBUG_ENCODING 1996 switch (ret) { 1997 case 0: 1998 xmlGenericError(xmlGenericErrorContext, 1999 "converted %d bytes to %d bytes of input\n", 2000 toconv, written); 2001 break; 2002 case -1: 2003 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2004 toconv, written, in->use); 2005 break; 2006 case -2: 2007 xmlGenericError(xmlGenericErrorContext, 2008 "input conversion failed due to input error\n"); 2009 break; 2010 case -3: 2011 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n", 2012 toconv, written, in->use); 2013 break; 2014 default: 2015 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret); 2016 } 2017 #endif /* DEBUG_ENCODING */ 2018 /* 2019 * Ignore when input buffer is not on a boundary 2020 */ 2021 if (ret == -3) ret = 0; 2022 if (ret == -1) ret = 0; 2023 return(ret); 2024 } 2025 2026 /** 2027 * xmlCharEncFirstLine: 2028 * @handler: char enconding transformation data structure 2029 * @out: an xmlBuffer for the output. 2030 * @in: an xmlBuffer for the input 2031 * 2032 * Front-end for the encoding handler input function, but handle only 2033 * the very first line, i.e. limit itself to 45 chars. 2034 * 2035 * Returns the number of byte written if success, or 2036 * -1 general error 2037 * -2 if the transcoding fails (for *in is not valid utf8 string or 2038 * the result of transformation can't fit into the encoding we want), or 2039 */ 2040 int 2041 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2042 xmlBufferPtr in) { 2043 return(xmlCharEncFirstLineInt(handler, out, in, -1)); 2044 } 2045 2046 /** 2047 * xmlCharEncFirstLineInput: 2048 * @input: a parser input buffer 2049 * @len: number of bytes to convert for the first line, or -1 2050 * 2051 * Front-end for the encoding handler input function, but handle only 2052 * the very first line. Point is that this is based on autodetection 2053 * of the encoding and once that first line is converted we may find 2054 * out that a different decoder is needed to process the input. 2055 * 2056 * Returns the number of byte written if success, or 2057 * -1 general error 2058 * -2 if the transcoding fails (for *in is not valid utf8 string or 2059 * the result of transformation can't fit into the encoding we want), or 2060 */ 2061 int 2062 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) 2063 { 2064 int ret = -2; 2065 size_t written; 2066 size_t toconv; 2067 int c_in; 2068 int c_out; 2069 xmlBufPtr in; 2070 xmlBufPtr out; 2071 2072 if ((input == NULL) || (input->encoder == NULL) || 2073 (input->buffer == NULL) || (input->raw == NULL)) 2074 return (-1); 2075 out = input->buffer; 2076 in = input->raw; 2077 2078 toconv = xmlBufUse(in); 2079 if (toconv == 0) 2080 return (0); 2081 written = xmlBufAvail(out) - 1; /* count '\0' */ 2082 /* 2083 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38 2084 * 45 chars should be sufficient to reach the end of the encoding 2085 * declaration without going too far inside the document content. 2086 * on UTF-16 this means 90bytes, on UCS4 this means 180 2087 * The actual value depending on guessed encoding is passed as @len 2088 * if provided 2089 */ 2090 if (len >= 0) { 2091 if (toconv > (unsigned int) len) 2092 toconv = len; 2093 } else { 2094 if (toconv > 180) 2095 toconv = 180; 2096 } 2097 if (toconv * 2 >= written) { 2098 xmlBufGrow(out, toconv * 2); 2099 written = xmlBufAvail(out) - 1; 2100 } 2101 if (written > 360) 2102 written = 360; 2103 2104 c_in = toconv; 2105 c_out = written; 2106 if (input->encoder->input != NULL) { 2107 ret = input->encoder->input(xmlBufEnd(out), &c_out, 2108 xmlBufContent(in), &c_in); 2109 xmlBufShrink(in, c_in); 2110 xmlBufAddLen(out, c_out); 2111 } 2112 #ifdef LIBXML_ICONV_ENABLED 2113 else if (input->encoder->iconv_in != NULL) { 2114 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2115 &c_out, xmlBufContent(in), &c_in); 2116 xmlBufShrink(in, c_in); 2117 xmlBufAddLen(out, c_out); 2118 if (ret == -1) 2119 ret = -3; 2120 } 2121 #endif /* LIBXML_ICONV_ENABLED */ 2122 #ifdef LIBXML_ICU_ENABLED 2123 else if (input->encoder->uconv_in != NULL) { 2124 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2125 &c_out, xmlBufContent(in), &c_in); 2126 xmlBufShrink(in, c_in); 2127 xmlBufAddLen(out, c_out); 2128 if (ret == -1) 2129 ret = -3; 2130 } 2131 #endif /* LIBXML_ICU_ENABLED */ 2132 switch (ret) { 2133 case 0: 2134 #ifdef DEBUG_ENCODING 2135 xmlGenericError(xmlGenericErrorContext, 2136 "converted %d bytes to %d bytes of input\n", 2137 c_in, c_out); 2138 #endif 2139 break; 2140 case -1: 2141 #ifdef DEBUG_ENCODING 2142 xmlGenericError(xmlGenericErrorContext, 2143 "converted %d bytes to %d bytes of input, %d left\n", 2144 c_in, c_out, (int)xmlBufUse(in)); 2145 #endif 2146 break; 2147 case -3: 2148 #ifdef DEBUG_ENCODING 2149 xmlGenericError(xmlGenericErrorContext, 2150 "converted %d bytes to %d bytes of input, %d left\n", 2151 c_in, c_out, (int)xmlBufUse(in)); 2152 #endif 2153 break; 2154 case -2: { 2155 char buf[50]; 2156 const xmlChar *content = xmlBufContent(in); 2157 2158 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2159 content[0], content[1], 2160 content[2], content[3]); 2161 buf[49] = 0; 2162 xmlEncodingErr(XML_I18N_CONV_FAILED, 2163 "input conversion failed due to input error, bytes %s\n", 2164 buf); 2165 } 2166 } 2167 /* 2168 * Ignore when input buffer is not on a boundary 2169 */ 2170 if (ret == -3) ret = 0; 2171 if (ret == -1) ret = 0; 2172 return(ret); 2173 } 2174 2175 /** 2176 * xmlCharEncInput: 2177 * @input: a parser input buffer 2178 * @flush: try to flush all the raw buffer 2179 * 2180 * Generic front-end for the encoding handler on parser input 2181 * 2182 * Returns the number of byte written if success, or 2183 * -1 general error 2184 * -2 if the transcoding fails (for *in is not valid utf8 string or 2185 * the result of transformation can't fit into the encoding we want), or 2186 */ 2187 int 2188 xmlCharEncInput(xmlParserInputBufferPtr input, int flush) 2189 { 2190 int ret = -2; 2191 size_t written; 2192 size_t toconv; 2193 int c_in; 2194 int c_out; 2195 xmlBufPtr in; 2196 xmlBufPtr out; 2197 2198 if ((input == NULL) || (input->encoder == NULL) || 2199 (input->buffer == NULL) || (input->raw == NULL)) 2200 return (-1); 2201 out = input->buffer; 2202 in = input->raw; 2203 2204 toconv = xmlBufUse(in); 2205 if (toconv == 0) 2206 return (0); 2207 if ((toconv > 64 * 1024) && (flush == 0)) 2208 toconv = 64 * 1024; 2209 written = xmlBufAvail(out); 2210 if (written > 0) 2211 written--; /* count '\0' */ 2212 if (toconv * 2 >= written) { 2213 xmlBufGrow(out, toconv * 2); 2214 written = xmlBufAvail(out); 2215 if (written > 0) 2216 written--; /* count '\0' */ 2217 } 2218 if ((written > 128 * 1024) && (flush == 0)) 2219 written = 128 * 1024; 2220 2221 c_in = toconv; 2222 c_out = written; 2223 if (input->encoder->input != NULL) { 2224 ret = input->encoder->input(xmlBufEnd(out), &c_out, 2225 xmlBufContent(in), &c_in); 2226 xmlBufShrink(in, c_in); 2227 xmlBufAddLen(out, c_out); 2228 } 2229 #ifdef LIBXML_ICONV_ENABLED 2230 else if (input->encoder->iconv_in != NULL) { 2231 ret = xmlIconvWrapper(input->encoder->iconv_in, xmlBufEnd(out), 2232 &c_out, xmlBufContent(in), &c_in); 2233 xmlBufShrink(in, c_in); 2234 xmlBufAddLen(out, c_out); 2235 if (ret == -1) 2236 ret = -3; 2237 } 2238 #endif /* LIBXML_ICONV_ENABLED */ 2239 #ifdef LIBXML_ICU_ENABLED 2240 else if (input->encoder->uconv_in != NULL) { 2241 ret = xmlUconvWrapper(input->encoder->uconv_in, 1, xmlBufEnd(out), 2242 &c_out, xmlBufContent(in), &c_in); 2243 xmlBufShrink(in, c_in); 2244 xmlBufAddLen(out, c_out); 2245 if (ret == -1) 2246 ret = -3; 2247 } 2248 #endif /* LIBXML_ICU_ENABLED */ 2249 switch (ret) { 2250 case 0: 2251 #ifdef DEBUG_ENCODING 2252 xmlGenericError(xmlGenericErrorContext, 2253 "converted %d bytes to %d bytes of input\n", 2254 c_in, c_out); 2255 #endif 2256 break; 2257 case -1: 2258 #ifdef DEBUG_ENCODING 2259 xmlGenericError(xmlGenericErrorContext, 2260 "converted %d bytes to %d bytes of input, %d left\n", 2261 c_in, c_out, (int)xmlBufUse(in)); 2262 #endif 2263 break; 2264 case -3: 2265 #ifdef DEBUG_ENCODING 2266 xmlGenericError(xmlGenericErrorContext, 2267 "converted %d bytes to %d bytes of input, %d left\n", 2268 c_in, c_out, (int)xmlBufUse(in)); 2269 #endif 2270 break; 2271 case -2: { 2272 char buf[50]; 2273 const xmlChar *content = xmlBufContent(in); 2274 2275 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2276 content[0], content[1], 2277 content[2], content[3]); 2278 buf[49] = 0; 2279 xmlEncodingErr(XML_I18N_CONV_FAILED, 2280 "input conversion failed due to input error, bytes %s\n", 2281 buf); 2282 } 2283 } 2284 /* 2285 * Ignore when input buffer is not on a boundary 2286 */ 2287 if (ret == -3) 2288 ret = 0; 2289 return ((ret < 0) ? ret : c_out); 2290 } 2291 2292 /** 2293 * xmlCharEncInFunc: 2294 * @handler: char encoding transformation data structure 2295 * @out: an xmlBuffer for the output. 2296 * @in: an xmlBuffer for the input 2297 * 2298 * Generic front-end for the encoding handler input function 2299 * 2300 * Returns the number of byte written if success, or 2301 * -1 general error 2302 * -2 if the transcoding fails (for *in is not valid utf8 string or 2303 * the result of transformation can't fit into the encoding we want), or 2304 */ 2305 int 2306 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out, 2307 xmlBufferPtr in) 2308 { 2309 int ret = -2; 2310 int written; 2311 int toconv; 2312 2313 if (handler == NULL) 2314 return (-1); 2315 if (out == NULL) 2316 return (-1); 2317 if (in == NULL) 2318 return (-1); 2319 2320 toconv = in->use; 2321 if (toconv == 0) 2322 return (0); 2323 written = out->size - out->use -1; /* count '\0' */ 2324 if (toconv * 2 >= written) { 2325 xmlBufferGrow(out, out->size + toconv * 2); 2326 written = out->size - out->use - 1; 2327 } 2328 if (handler->input != NULL) { 2329 ret = handler->input(&out->content[out->use], &written, 2330 in->content, &toconv); 2331 xmlBufferShrink(in, toconv); 2332 out->use += written; 2333 out->content[out->use] = 0; 2334 } 2335 #ifdef LIBXML_ICONV_ENABLED 2336 else if (handler->iconv_in != NULL) { 2337 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use], 2338 &written, in->content, &toconv); 2339 xmlBufferShrink(in, toconv); 2340 out->use += written; 2341 out->content[out->use] = 0; 2342 if (ret == -1) 2343 ret = -3; 2344 } 2345 #endif /* LIBXML_ICONV_ENABLED */ 2346 #ifdef LIBXML_ICU_ENABLED 2347 else if (handler->uconv_in != NULL) { 2348 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use], 2349 &written, in->content, &toconv); 2350 xmlBufferShrink(in, toconv); 2351 out->use += written; 2352 out->content[out->use] = 0; 2353 if (ret == -1) 2354 ret = -3; 2355 } 2356 #endif /* LIBXML_ICU_ENABLED */ 2357 switch (ret) { 2358 case 0: 2359 #ifdef DEBUG_ENCODING 2360 xmlGenericError(xmlGenericErrorContext, 2361 "converted %d bytes to %d bytes of input\n", 2362 toconv, written); 2363 #endif 2364 break; 2365 case -1: 2366 #ifdef DEBUG_ENCODING 2367 xmlGenericError(xmlGenericErrorContext, 2368 "converted %d bytes to %d bytes of input, %d left\n", 2369 toconv, written, in->use); 2370 #endif 2371 break; 2372 case -3: 2373 #ifdef DEBUG_ENCODING 2374 xmlGenericError(xmlGenericErrorContext, 2375 "converted %d bytes to %d bytes of input, %d left\n", 2376 toconv, written, in->use); 2377 #endif 2378 break; 2379 case -2: { 2380 char buf[50]; 2381 2382 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2383 in->content[0], in->content[1], 2384 in->content[2], in->content[3]); 2385 buf[49] = 0; 2386 xmlEncodingErr(XML_I18N_CONV_FAILED, 2387 "input conversion failed due to input error, bytes %s\n", 2388 buf); 2389 } 2390 } 2391 /* 2392 * Ignore when input buffer is not on a boundary 2393 */ 2394 if (ret == -3) 2395 ret = 0; 2396 return (written? written : ret); 2397 } 2398 2399 #ifdef LIBXML_OUTPUT_ENABLED 2400 /** 2401 * xmlCharEncOutput: 2402 * @output: a parser output buffer 2403 * @init: is this an initialization call without data 2404 * 2405 * Generic front-end for the encoding handler on parser output 2406 * a first call with @init == 1 has to be made first to initiate the 2407 * output in case of non-stateless encoding needing to initiate their 2408 * state or the output (like the BOM in UTF16). 2409 * In case of UTF8 sequence conversion errors for the given encoder, 2410 * the content will be automatically remapped to a CharRef sequence. 2411 * 2412 * Returns the number of byte written if success, or 2413 * -1 general error 2414 * -2 if the transcoding fails (for *in is not valid utf8 string or 2415 * the result of transformation can't fit into the encoding we want), or 2416 */ 2417 int 2418 xmlCharEncOutput(xmlOutputBufferPtr output, int init) 2419 { 2420 int ret = -2; 2421 size_t written; 2422 int writtentot = 0; 2423 size_t toconv; 2424 int c_in; 2425 int c_out; 2426 xmlBufPtr in; 2427 xmlBufPtr out; 2428 int charref_len = 0; 2429 2430 if ((output == NULL) || (output->encoder == NULL) || 2431 (output->buffer == NULL) || (output->conv == NULL)) 2432 return (-1); 2433 out = output->conv; 2434 in = output->buffer; 2435 2436 retry: 2437 2438 written = xmlBufAvail(out); 2439 if (written > 0) 2440 written--; /* count '\0' */ 2441 2442 /* 2443 * First specific handling of the initialization call 2444 */ 2445 if (init) { 2446 c_in = 0; 2447 c_out = written; 2448 if (output->encoder->output != NULL) { 2449 ret = output->encoder->output(xmlBufEnd(out), &c_out, 2450 NULL, &c_in); 2451 if (ret > 0) /* Gennady: check return value */ 2452 xmlBufAddLen(out, c_out); 2453 } 2454 #ifdef LIBXML_ICONV_ENABLED 2455 else if (output->encoder->iconv_out != NULL) { 2456 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2457 &c_out, NULL, &c_in); 2458 xmlBufAddLen(out, c_out); 2459 } 2460 #endif /* LIBXML_ICONV_ENABLED */ 2461 #ifdef LIBXML_ICU_ENABLED 2462 else if (output->encoder->uconv_out != NULL) { 2463 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2464 &c_out, NULL, &c_in); 2465 xmlBufAddLen(out, c_out); 2466 } 2467 #endif /* LIBXML_ICU_ENABLED */ 2468 #ifdef DEBUG_ENCODING 2469 xmlGenericError(xmlGenericErrorContext, 2470 "initialized encoder\n"); 2471 #endif 2472 return(c_out); 2473 } 2474 2475 /* 2476 * Conversion itself. 2477 */ 2478 toconv = xmlBufUse(in); 2479 if (toconv == 0) 2480 return (0); 2481 if (toconv > 64 * 1024) 2482 toconv = 64 * 1024; 2483 if (toconv * 4 >= written) { 2484 xmlBufGrow(out, toconv * 4); 2485 written = xmlBufAvail(out) - 1; 2486 } 2487 if (written > 256 * 1024) 2488 written = 256 * 1024; 2489 2490 c_in = toconv; 2491 c_out = written; 2492 if (output->encoder->output != NULL) { 2493 ret = output->encoder->output(xmlBufEnd(out), &c_out, 2494 xmlBufContent(in), &c_in); 2495 if (c_out > 0) { 2496 xmlBufShrink(in, c_in); 2497 xmlBufAddLen(out, c_out); 2498 writtentot += c_out; 2499 } 2500 } 2501 #ifdef LIBXML_ICONV_ENABLED 2502 else if (output->encoder->iconv_out != NULL) { 2503 ret = xmlIconvWrapper(output->encoder->iconv_out, xmlBufEnd(out), 2504 &c_out, xmlBufContent(in), &c_in); 2505 xmlBufShrink(in, c_in); 2506 xmlBufAddLen(out, c_out); 2507 writtentot += c_out; 2508 if (ret == -1) { 2509 if (c_out > 0) { 2510 /* 2511 * Can be a limitation of iconv 2512 */ 2513 charref_len = 0; 2514 goto retry; 2515 } 2516 ret = -3; 2517 } 2518 } 2519 #endif /* LIBXML_ICONV_ENABLED */ 2520 #ifdef LIBXML_ICU_ENABLED 2521 else if (output->encoder->uconv_out != NULL) { 2522 ret = xmlUconvWrapper(output->encoder->uconv_out, 0, xmlBufEnd(out), 2523 &c_out, xmlBufContent(in), &c_in); 2524 xmlBufShrink(in, c_in); 2525 xmlBufAddLen(out, c_out); 2526 writtentot += c_out; 2527 if (ret == -1) { 2528 if (c_out > 0) { 2529 /* 2530 * Can be a limitation of uconv 2531 */ 2532 charref_len = 0; 2533 goto retry; 2534 } 2535 ret = -3; 2536 } 2537 } 2538 #endif /* LIBXML_ICU_ENABLED */ 2539 else { 2540 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2541 "xmlCharEncOutFunc: no output function !\n", NULL); 2542 return(-1); 2543 } 2544 2545 if (ret >= 0) output += ret; 2546 2547 /* 2548 * Attempt to handle error cases 2549 */ 2550 switch (ret) { 2551 case 0: 2552 #ifdef DEBUG_ENCODING 2553 xmlGenericError(xmlGenericErrorContext, 2554 "converted %d bytes to %d bytes of output\n", 2555 c_in, c_out); 2556 #endif 2557 break; 2558 case -1: 2559 #ifdef DEBUG_ENCODING 2560 xmlGenericError(xmlGenericErrorContext, 2561 "output conversion failed by lack of space\n"); 2562 #endif 2563 break; 2564 case -3: 2565 #ifdef DEBUG_ENCODING 2566 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2567 c_in, c_out, (int) xmlBufUse(in)); 2568 #endif 2569 break; 2570 case -2: { 2571 int len = (int) xmlBufUse(in); 2572 xmlChar *content = xmlBufContent(in); 2573 int cur; 2574 2575 cur = xmlGetUTF8Char(content, &len); 2576 if ((charref_len != 0) && (c_out < charref_len)) { 2577 /* 2578 * We attempted to insert a character reference and failed. 2579 * Undo what was written and skip the remaining charref. 2580 */ 2581 xmlBufErase(out, c_out); 2582 writtentot -= c_out; 2583 xmlBufShrink(in, charref_len - c_out); 2584 charref_len = 0; 2585 2586 ret = -1; 2587 break; 2588 } else if (cur > 0) { 2589 xmlChar charref[20]; 2590 2591 #ifdef DEBUG_ENCODING 2592 xmlGenericError(xmlGenericErrorContext, 2593 "handling output conversion error\n"); 2594 xmlGenericError(xmlGenericErrorContext, 2595 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2596 content[0], content[1], 2597 content[2], content[3]); 2598 #endif 2599 /* 2600 * Removes the UTF8 sequence, and replace it by a charref 2601 * and continue the transcoding phase, hoping the error 2602 * did not mangle the encoder state. 2603 */ 2604 charref_len = snprintf((char *) &charref[0], sizeof(charref), 2605 "&#%d;", cur); 2606 xmlBufShrink(in, len); 2607 xmlBufAddHead(in, charref, -1); 2608 2609 goto retry; 2610 } else { 2611 char buf[50]; 2612 2613 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2614 content[0], content[1], 2615 content[2], content[3]); 2616 buf[49] = 0; 2617 xmlEncodingErr(XML_I18N_CONV_FAILED, 2618 "output conversion failed due to conv error, bytes %s\n", 2619 buf); 2620 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE) 2621 content[0] = ' '; 2622 } 2623 break; 2624 } 2625 } 2626 return(writtentot ? writtentot : ret); 2627 } 2628 #endif 2629 2630 /** 2631 * xmlCharEncOutFunc: 2632 * @handler: char enconding transformation data structure 2633 * @out: an xmlBuffer for the output. 2634 * @in: an xmlBuffer for the input 2635 * 2636 * Generic front-end for the encoding handler output function 2637 * a first call with @in == NULL has to be made firs to initiate the 2638 * output in case of non-stateless encoding needing to initiate their 2639 * state or the output (like the BOM in UTF16). 2640 * In case of UTF8 sequence conversion errors for the given encoder, 2641 * the content will be automatically remapped to a CharRef sequence. 2642 * 2643 * Returns the number of byte written if success, or 2644 * -1 general error 2645 * -2 if the transcoding fails (for *in is not valid utf8 string or 2646 * the result of transformation can't fit into the encoding we want), or 2647 */ 2648 int 2649 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, 2650 xmlBufferPtr in) { 2651 int ret = -2; 2652 int written; 2653 int writtentot = 0; 2654 int toconv; 2655 int output = 0; 2656 int charref_len = 0; 2657 2658 if (handler == NULL) return(-1); 2659 if (out == NULL) return(-1); 2660 2661 retry: 2662 2663 written = out->size - out->use; 2664 2665 if (written > 0) 2666 written--; /* Gennady: count '/0' */ 2667 2668 /* 2669 * First specific handling of in = NULL, i.e. the initialization call 2670 */ 2671 if (in == NULL) { 2672 toconv = 0; 2673 if (handler->output != NULL) { 2674 ret = handler->output(&out->content[out->use], &written, 2675 NULL, &toconv); 2676 if (ret >= 0) { /* Gennady: check return value */ 2677 out->use += written; 2678 out->content[out->use] = 0; 2679 } 2680 } 2681 #ifdef LIBXML_ICONV_ENABLED 2682 else if (handler->iconv_out != NULL) { 2683 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2684 &written, NULL, &toconv); 2685 out->use += written; 2686 out->content[out->use] = 0; 2687 } 2688 #endif /* LIBXML_ICONV_ENABLED */ 2689 #ifdef LIBXML_ICU_ENABLED 2690 else if (handler->uconv_out != NULL) { 2691 ret = xmlUconvWrapper(handler->uconv_out, 0, 2692 &out->content[out->use], 2693 &written, NULL, &toconv); 2694 out->use += written; 2695 out->content[out->use] = 0; 2696 } 2697 #endif /* LIBXML_ICU_ENABLED */ 2698 #ifdef DEBUG_ENCODING 2699 xmlGenericError(xmlGenericErrorContext, 2700 "initialized encoder\n"); 2701 #endif 2702 return(0); 2703 } 2704 2705 /* 2706 * Conversion itself. 2707 */ 2708 toconv = in->use; 2709 if (toconv == 0) 2710 return(0); 2711 if (toconv * 4 >= written) { 2712 xmlBufferGrow(out, toconv * 4); 2713 written = out->size - out->use - 1; 2714 } 2715 if (handler->output != NULL) { 2716 ret = handler->output(&out->content[out->use], &written, 2717 in->content, &toconv); 2718 if (written > 0) { 2719 xmlBufferShrink(in, toconv); 2720 out->use += written; 2721 writtentot += written; 2722 } 2723 out->content[out->use] = 0; 2724 } 2725 #ifdef LIBXML_ICONV_ENABLED 2726 else if (handler->iconv_out != NULL) { 2727 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use], 2728 &written, in->content, &toconv); 2729 xmlBufferShrink(in, toconv); 2730 out->use += written; 2731 writtentot += written; 2732 out->content[out->use] = 0; 2733 if (ret == -1) { 2734 if (written > 0) { 2735 /* 2736 * Can be a limitation of iconv 2737 */ 2738 charref_len = 0; 2739 goto retry; 2740 } 2741 ret = -3; 2742 } 2743 } 2744 #endif /* LIBXML_ICONV_ENABLED */ 2745 #ifdef LIBXML_ICU_ENABLED 2746 else if (handler->uconv_out != NULL) { 2747 ret = xmlUconvWrapper(handler->uconv_out, 0, 2748 &out->content[out->use], 2749 &written, in->content, &toconv); 2750 xmlBufferShrink(in, toconv); 2751 out->use += written; 2752 writtentot += written; 2753 out->content[out->use] = 0; 2754 if (ret == -1) { 2755 if (written > 0) { 2756 /* 2757 * Can be a limitation of iconv 2758 */ 2759 charref_len = 0; 2760 goto retry; 2761 } 2762 ret = -3; 2763 } 2764 } 2765 #endif /* LIBXML_ICU_ENABLED */ 2766 else { 2767 xmlEncodingErr(XML_I18N_NO_OUTPUT, 2768 "xmlCharEncOutFunc: no output function !\n", NULL); 2769 return(-1); 2770 } 2771 2772 if (ret >= 0) output += ret; 2773 2774 /* 2775 * Attempt to handle error cases 2776 */ 2777 switch (ret) { 2778 case 0: 2779 #ifdef DEBUG_ENCODING 2780 xmlGenericError(xmlGenericErrorContext, 2781 "converted %d bytes to %d bytes of output\n", 2782 toconv, written); 2783 #endif 2784 break; 2785 case -1: 2786 #ifdef DEBUG_ENCODING 2787 xmlGenericError(xmlGenericErrorContext, 2788 "output conversion failed by lack of space\n"); 2789 #endif 2790 break; 2791 case -3: 2792 #ifdef DEBUG_ENCODING 2793 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n", 2794 toconv, written, in->use); 2795 #endif 2796 break; 2797 case -2: { 2798 int len = in->use; 2799 const xmlChar *utf = (const xmlChar *) in->content; 2800 int cur; 2801 2802 cur = xmlGetUTF8Char(utf, &len); 2803 if ((charref_len != 0) && (written < charref_len)) { 2804 /* 2805 * We attempted to insert a character reference and failed. 2806 * Undo what was written and skip the remaining charref. 2807 */ 2808 out->use -= written; 2809 writtentot -= written; 2810 xmlBufferShrink(in, charref_len - written); 2811 charref_len = 0; 2812 2813 ret = -1; 2814 break; 2815 } else if (cur > 0) { 2816 xmlChar charref[20]; 2817 2818 #ifdef DEBUG_ENCODING 2819 xmlGenericError(xmlGenericErrorContext, 2820 "handling output conversion error\n"); 2821 xmlGenericError(xmlGenericErrorContext, 2822 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 2823 in->content[0], in->content[1], 2824 in->content[2], in->content[3]); 2825 #endif 2826 /* 2827 * Removes the UTF8 sequence, and replace it by a charref 2828 * and continue the transcoding phase, hoping the error 2829 * did not mangle the encoder state. 2830 */ 2831 charref_len = snprintf((char *) &charref[0], sizeof(charref), 2832 "&#%d;", cur); 2833 xmlBufferShrink(in, len); 2834 xmlBufferAddHead(in, charref, -1); 2835 2836 goto retry; 2837 } else { 2838 char buf[50]; 2839 2840 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X", 2841 in->content[0], in->content[1], 2842 in->content[2], in->content[3]); 2843 buf[49] = 0; 2844 xmlEncodingErr(XML_I18N_CONV_FAILED, 2845 "output conversion failed due to conv error, bytes %s\n", 2846 buf); 2847 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE) 2848 in->content[0] = ' '; 2849 } 2850 break; 2851 } 2852 } 2853 return(ret); 2854 } 2855 2856 /** 2857 * xmlCharEncCloseFunc: 2858 * @handler: char enconding transformation data structure 2859 * 2860 * Generic front-end for encoding handler close function 2861 * 2862 * Returns 0 if success, or -1 in case of error 2863 */ 2864 int 2865 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { 2866 int ret = 0; 2867 int tofree = 0; 2868 int i, handler_in_list = 0; 2869 2870 if (handler == NULL) return(-1); 2871 if (handler->name == NULL) return(-1); 2872 if (handlers != NULL) { 2873 for (i = 0;i < nbCharEncodingHandler; i++) { 2874 if (handler == handlers[i]) { 2875 handler_in_list = 1; 2876 break; 2877 } 2878 } 2879 } 2880 #ifdef LIBXML_ICONV_ENABLED 2881 /* 2882 * Iconv handlers can be used only once, free the whole block. 2883 * and the associated icon resources. 2884 */ 2885 if ((handler_in_list == 0) && 2886 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) { 2887 tofree = 1; 2888 if (handler->iconv_out != NULL) { 2889 if (iconv_close(handler->iconv_out)) 2890 ret = -1; 2891 handler->iconv_out = NULL; 2892 } 2893 if (handler->iconv_in != NULL) { 2894 if (iconv_close(handler->iconv_in)) 2895 ret = -1; 2896 handler->iconv_in = NULL; 2897 } 2898 } 2899 #endif /* LIBXML_ICONV_ENABLED */ 2900 #ifdef LIBXML_ICU_ENABLED 2901 if ((handler_in_list == 0) && 2902 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) { 2903 tofree = 1; 2904 if (handler->uconv_out != NULL) { 2905 closeIcuConverter(handler->uconv_out); 2906 handler->uconv_out = NULL; 2907 } 2908 if (handler->uconv_in != NULL) { 2909 closeIcuConverter(handler->uconv_in); 2910 handler->uconv_in = NULL; 2911 } 2912 } 2913 #endif 2914 if (tofree) { 2915 /* free up only dynamic handlers iconv/uconv */ 2916 if (handler->name != NULL) 2917 xmlFree(handler->name); 2918 handler->name = NULL; 2919 xmlFree(handler); 2920 } 2921 #ifdef DEBUG_ENCODING 2922 if (ret) 2923 xmlGenericError(xmlGenericErrorContext, 2924 "failed to close the encoding handler\n"); 2925 else 2926 xmlGenericError(xmlGenericErrorContext, 2927 "closed the encoding handler\n"); 2928 #endif 2929 2930 return(ret); 2931 } 2932 2933 /** 2934 * xmlByteConsumed: 2935 * @ctxt: an XML parser context 2936 * 2937 * This function provides the current index of the parser relative 2938 * to the start of the current entity. This function is computed in 2939 * bytes from the beginning starting at zero and finishing at the 2940 * size in byte of the file if parsing a file. The function is 2941 * of constant cost if the input is UTF-8 but can be costly if run 2942 * on non-UTF-8 input. 2943 * 2944 * Returns the index in bytes from the beginning of the entity or -1 2945 * in case the index could not be computed. 2946 */ 2947 long 2948 xmlByteConsumed(xmlParserCtxtPtr ctxt) { 2949 xmlParserInputPtr in; 2950 2951 if (ctxt == NULL) return(-1); 2952 in = ctxt->input; 2953 if (in == NULL) return(-1); 2954 if ((in->buf != NULL) && (in->buf->encoder != NULL)) { 2955 unsigned int unused = 0; 2956 xmlCharEncodingHandler * handler = in->buf->encoder; 2957 /* 2958 * Encoding conversion, compute the number of unused original 2959 * bytes from the input not consumed and substract that from 2960 * the raw consumed value, this is not a cheap operation 2961 */ 2962 if (in->end - in->cur > 0) { 2963 unsigned char convbuf[32000]; 2964 const unsigned char *cur = (const unsigned char *)in->cur; 2965 int toconv = in->end - in->cur, written = 32000; 2966 2967 int ret; 2968 2969 if (handler->output != NULL) { 2970 do { 2971 toconv = in->end - cur; 2972 written = 32000; 2973 ret = handler->output(&convbuf[0], &written, 2974 cur, &toconv); 2975 if (ret == -1) return(-1); 2976 unused += written; 2977 cur += toconv; 2978 } while (ret == -2); 2979 #ifdef LIBXML_ICONV_ENABLED 2980 } else if (handler->iconv_out != NULL) { 2981 do { 2982 toconv = in->end - cur; 2983 written = 32000; 2984 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0], 2985 &written, cur, &toconv); 2986 if (ret < 0) { 2987 if (written > 0) 2988 ret = -2; 2989 else 2990 return(-1); 2991 } 2992 unused += written; 2993 cur += toconv; 2994 } while (ret == -2); 2995 #endif 2996 #ifdef LIBXML_ICU_ENABLED 2997 } else if (handler->uconv_out != NULL) { 2998 do { 2999 toconv = in->end - cur; 3000 written = 32000; 3001 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0], 3002 &written, cur, &toconv); 3003 if (ret < 0) { 3004 if (written > 0) 3005 ret = -2; 3006 else 3007 return(-1); 3008 } 3009 unused += written; 3010 cur += toconv; 3011 } while (ret == -2); 3012 #endif 3013 } else { 3014 /* could not find a converter */ 3015 return(-1); 3016 } 3017 } 3018 if (in->buf->rawconsumed < unused) 3019 return(-1); 3020 return(in->buf->rawconsumed - unused); 3021 } 3022 return(in->consumed + (in->cur - in->base)); 3023 } 3024 3025 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) 3026 #ifdef LIBXML_ISO8859X_ENABLED 3027 3028 /** 3029 * UTF8ToISO8859x: 3030 * @out: a pointer to an array of bytes to store the result 3031 * @outlen: the length of @out 3032 * @in: a pointer to an array of UTF-8 chars 3033 * @inlen: the length of @in 3034 * @xlattable: the 2-level transcoding table 3035 * 3036 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-* 3037 * block of chars out. 3038 * 3039 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise 3040 * The value of @inlen after return is the number of octets consumed 3041 * as the return value is positive, else unpredictable. 3042 * The value of @outlen after return is the number of ocetes consumed. 3043 */ 3044 static int 3045 UTF8ToISO8859x(unsigned char* out, int *outlen, 3046 const unsigned char* in, int *inlen, 3047 unsigned char const *xlattable) { 3048 const unsigned char* outstart = out; 3049 const unsigned char* inend; 3050 const unsigned char* instart = in; 3051 const unsigned char* processed = in; 3052 3053 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3054 (xlattable == NULL)) 3055 return(-1); 3056 if (in == NULL) { 3057 /* 3058 * initialization nothing to do 3059 */ 3060 *outlen = 0; 3061 *inlen = 0; 3062 return(0); 3063 } 3064 inend = in + (*inlen); 3065 while (in < inend) { 3066 unsigned char d = *in++; 3067 if (d < 0x80) { 3068 *out++ = d; 3069 } else if (d < 0xC0) { 3070 /* trailing byte in leading position */ 3071 *outlen = out - outstart; 3072 *inlen = processed - instart; 3073 return(-2); 3074 } else if (d < 0xE0) { 3075 unsigned char c; 3076 if (!(in < inend)) { 3077 /* trailing byte not in input buffer */ 3078 *outlen = out - outstart; 3079 *inlen = processed - instart; 3080 return(-3); 3081 } 3082 c = *in++; 3083 if ((c & 0xC0) != 0x80) { 3084 /* not a trailing byte */ 3085 *outlen = out - outstart; 3086 *inlen = processed - instart; 3087 return(-2); 3088 } 3089 c = c & 0x3F; 3090 d = d & 0x1F; 3091 d = xlattable [48 + c + xlattable [d] * 64]; 3092 if (d == 0) { 3093 /* not in character set */ 3094 *outlen = out - outstart; 3095 *inlen = processed - instart; 3096 return(-2); 3097 } 3098 *out++ = d; 3099 } else if (d < 0xF0) { 3100 unsigned char c1; 3101 unsigned char c2; 3102 if (!(in < inend - 1)) { 3103 /* trailing bytes not in input buffer */ 3104 *outlen = out - outstart; 3105 *inlen = processed - instart; 3106 return(-3); 3107 } 3108 c1 = *in++; 3109 if ((c1 & 0xC0) != 0x80) { 3110 /* not a trailing byte (c1) */ 3111 *outlen = out - outstart; 3112 *inlen = processed - instart; 3113 return(-2); 3114 } 3115 c2 = *in++; 3116 if ((c2 & 0xC0) != 0x80) { 3117 /* not a trailing byte (c2) */ 3118 *outlen = out - outstart; 3119 *inlen = processed - instart; 3120 return(-2); 3121 } 3122 c1 = c1 & 0x3F; 3123 c2 = c2 & 0x3F; 3124 d = d & 0x0F; 3125 d = xlattable [48 + c2 + xlattable [48 + c1 + 3126 xlattable [32 + d] * 64] * 64]; 3127 if (d == 0) { 3128 /* not in character set */ 3129 *outlen = out - outstart; 3130 *inlen = processed - instart; 3131 return(-2); 3132 } 3133 *out++ = d; 3134 } else { 3135 /* cannot transcode >= U+010000 */ 3136 *outlen = out - outstart; 3137 *inlen = processed - instart; 3138 return(-2); 3139 } 3140 processed = in; 3141 } 3142 *outlen = out - outstart; 3143 *inlen = processed - instart; 3144 return(*outlen); 3145 } 3146 3147 /** 3148 * ISO8859xToUTF8 3149 * @out: a pointer to an array of bytes to store the result 3150 * @outlen: the length of @out 3151 * @in: a pointer to an array of ISO Latin 1 chars 3152 * @inlen: the length of @in 3153 * 3154 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8 3155 * block of chars out. 3156 * Returns 0 if success, or -1 otherwise 3157 * The value of @inlen after return is the number of octets consumed 3158 * The value of @outlen after return is the number of ocetes produced. 3159 */ 3160 static int 3161 ISO8859xToUTF8(unsigned char* out, int *outlen, 3162 const unsigned char* in, int *inlen, 3163 unsigned short const *unicodetable) { 3164 unsigned char* outstart = out; 3165 unsigned char* outend; 3166 const unsigned char* instart = in; 3167 const unsigned char* inend; 3168 const unsigned char* instop; 3169 unsigned int c; 3170 3171 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || 3172 (in == NULL) || (unicodetable == NULL)) 3173 return(-1); 3174 outend = out + *outlen; 3175 inend = in + *inlen; 3176 instop = inend; 3177 3178 while ((in < inend) && (out < outend - 2)) { 3179 if (*in >= 0x80) { 3180 c = unicodetable [*in - 0x80]; 3181 if (c == 0) { 3182 /* undefined code point */ 3183 *outlen = out - outstart; 3184 *inlen = in - instart; 3185 return (-1); 3186 } 3187 if (c < 0x800) { 3188 *out++ = ((c >> 6) & 0x1F) | 0xC0; 3189 *out++ = (c & 0x3F) | 0x80; 3190 } else { 3191 *out++ = ((c >> 12) & 0x0F) | 0xE0; 3192 *out++ = ((c >> 6) & 0x3F) | 0x80; 3193 *out++ = (c & 0x3F) | 0x80; 3194 } 3195 ++in; 3196 } 3197 if (instop - in > outend - out) instop = in + (outend - out); 3198 while ((*in < 0x80) && (in < instop)) { 3199 *out++ = *in++; 3200 } 3201 } 3202 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3203 *out++ = *in++; 3204 } 3205 if ((in < inend) && (out < outend) && (*in < 0x80)) { 3206 *out++ = *in++; 3207 } 3208 *outlen = out - outstart; 3209 *inlen = in - instart; 3210 return (*outlen); 3211 } 3212 3213 3214 /************************************************************************ 3215 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding * 3216 ************************************************************************/ 3217 3218 static unsigned short const xmlunicodetable_ISO8859_2 [128] = { 3219 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3220 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3221 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3222 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3223 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 3224 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, 3225 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 3226 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, 3227 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 3228 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, 3229 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 3230 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, 3231 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 3232 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, 3233 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 3234 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, 3235 }; 3236 3237 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = { 3238 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3245 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3246 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3247 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3248 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3249 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef" 3250 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00" 3251 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3252 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00" 3253 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3254 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00" 3255 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3257 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00" 3258 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba" 3259 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9" 3260 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00" 3261 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3262 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf" 3263 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00" 3264 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00" 3265 }; 3266 3267 static unsigned short const xmlunicodetable_ISO8859_3 [128] = { 3268 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3269 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3270 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3271 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3272 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7, 3273 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b, 3274 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7, 3275 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c, 3276 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7, 3277 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3278 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7, 3279 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df, 3280 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7, 3281 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3282 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7, 3283 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9, 3284 }; 3285 3286 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = { 3287 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00" 3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3294 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3295 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3296 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00" 3297 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00" 3298 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00" 3299 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb" 3300 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00" 3301 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3303 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00" 3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3306 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba" 3312 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00" 3313 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00" 3314 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3315 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3316 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3317 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00" 3318 }; 3319 3320 static unsigned short const xmlunicodetable_ISO8859_4 [128] = { 3321 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3322 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3323 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3324 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3325 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7, 3326 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af, 3327 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7, 3328 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b, 3329 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3330 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a, 3331 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3332 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df, 3333 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3334 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b, 3335 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3336 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9, 3337 }; 3338 3339 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = { 3340 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00" 3341 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3342 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3343 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3344 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3345 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3347 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3348 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3349 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf" 3350 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00" 3351 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3352 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3353 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7" 3354 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00" 3355 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00" 3356 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00" 3357 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00" 3358 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00" 3359 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00" 3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3363 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00" 3364 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf" 3365 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00" 3366 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00" 3367 }; 3368 3369 static unsigned short const xmlunicodetable_ISO8859_5 [128] = { 3370 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3371 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3372 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3373 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3374 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 3375 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f, 3376 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 3377 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 3378 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 3379 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 3380 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 3381 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 3382 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 3383 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 3384 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 3385 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f, 3386 }; 3387 3388 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = { 3389 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3390 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3391 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3392 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3393 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3396 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3397 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3398 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00" 3399 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3400 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf" 3401 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3402 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3403 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3404 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3405 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff" 3406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3408 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3409 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3410 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3413 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3416 }; 3417 3418 static unsigned short const xmlunicodetable_ISO8859_6 [128] = { 3419 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3420 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3421 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3422 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3423 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000, 3424 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000, 3425 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3426 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f, 3427 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 3428 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, 3429 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 3430 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3431 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 3432 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 3433 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3434 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3435 }; 3436 3437 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = { 3438 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3439 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00" 3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3445 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3446 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3447 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00" 3448 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3449 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3450 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3451 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3452 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00" 3454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf" 3455 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3456 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00" 3457 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3458 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3461 }; 3462 3463 static unsigned short const xmlunicodetable_ISO8859_7 [128] = { 3464 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3465 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3466 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3467 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3468 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7, 3469 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015, 3470 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7, 3471 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f, 3472 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 3473 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 3474 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 3475 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, 3476 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 3477 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 3478 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 3479 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000, 3480 }; 3481 3482 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = { 3483 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06" 3484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3485 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3490 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3491 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3492 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00" 3493 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00" 3494 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3495 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3496 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3497 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3498 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3499 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00" 3500 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3501 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3502 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3503 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3504 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3505 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3506 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf" 3507 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3508 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3509 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3510 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00" 3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3514 }; 3515 3516 static unsigned short const xmlunicodetable_ISO8859_8 [128] = { 3517 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3518 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3519 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3520 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3521 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3522 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3523 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3524 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000, 3525 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3526 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3527 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 3528 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017, 3529 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 3530 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 3531 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 3532 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000, 3533 }; 3534 3535 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = { 3536 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3537 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00" 3538 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3542 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3543 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3544 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3545 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf" 3546 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00" 3547 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3548 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3549 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3551 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3552 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00" 3553 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3554 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00" 3555 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3556 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3557 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe" 3560 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00" 3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3564 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3565 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00" 3566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3567 }; 3568 3569 static unsigned short const xmlunicodetable_ISO8859_9 [128] = { 3570 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3571 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3572 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3573 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3574 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 3575 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3576 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 3577 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 3578 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3579 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3580 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3581 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 3582 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3583 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3584 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3585 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff, 3586 }; 3587 3588 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = { 3589 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3590 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3595 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3596 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3597 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3598 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3599 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3600 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3601 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf" 3602 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3603 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff" 3604 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3605 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0" 3606 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3607 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3608 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3609 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe" 3610 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3612 }; 3613 3614 static unsigned short const xmlunicodetable_ISO8859_10 [128] = { 3615 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3616 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3617 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3618 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3619 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7, 3620 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a, 3621 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7, 3622 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b, 3623 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e, 3624 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf, 3625 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168, 3626 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3627 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f, 3628 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef, 3629 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169, 3630 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138, 3631 }; 3632 3633 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = { 3634 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3636 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3640 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3641 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3642 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3643 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00" 3644 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00" 3645 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00" 3646 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00" 3647 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7" 3648 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00" 3649 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00" 3650 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3651 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00" 3652 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00" 3653 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3655 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3656 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3657 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3658 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3659 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3660 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3661 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf" 3662 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf" 3663 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef" 3664 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00" 3665 }; 3666 3667 static unsigned short const xmlunicodetable_ISO8859_11 [128] = { 3668 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3669 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3670 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3671 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3672 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07, 3673 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f, 3674 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17, 3675 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f, 3676 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27, 3677 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f, 3678 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37, 3679 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f, 3680 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47, 3681 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f, 3682 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57, 3683 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000, 3684 }; 3685 3686 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = { 3687 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3689 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3694 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3695 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3696 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3698 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3701 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00" 3702 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" 3703 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" 3704 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3705 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf" 3706 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3707 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3709 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3710 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3711 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00" 3712 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3713 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3714 }; 3715 3716 static unsigned short const xmlunicodetable_ISO8859_13 [128] = { 3717 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3718 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3719 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3720 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3721 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7, 3722 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6, 3723 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7, 3724 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6, 3725 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112, 3726 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b, 3727 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7, 3728 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df, 3729 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113, 3730 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c, 3731 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7, 3732 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019, 3733 }; 3734 3735 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = { 3736 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3738 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3743 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3744 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3745 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00" 3746 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00" 3747 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00" 3753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3755 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00" 3756 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf" 3757 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00" 3758 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00" 3759 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00" 3760 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00" 3761 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00" 3762 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00" 3763 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00" 3764 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00" 3765 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1" 3766 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00" 3767 }; 3768 3769 static unsigned short const xmlunicodetable_ISO8859_14 [128] = { 3770 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3771 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3772 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3773 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3774 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7, 3775 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178, 3776 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56, 3777 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61, 3778 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3779 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3780 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a, 3781 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df, 3782 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3783 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3784 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b, 3785 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff, 3786 }; 3787 3788 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = { 3789 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3791 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3792 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3793 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3794 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3796 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3797 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3798 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00" 3799 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3801 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3803 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00" 3804 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00" 3805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1" 3806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00" 3809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3810 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3811 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3812 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3813 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3814 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3819 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3820 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3821 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3822 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3823 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00" 3824 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3825 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00" 3826 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3827 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3828 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3829 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf" 3830 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3831 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff" 3832 }; 3833 3834 static unsigned short const xmlunicodetable_ISO8859_15 [128] = { 3835 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3836 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3837 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3838 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3839 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 3840 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 3841 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 3842 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, 3843 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 3844 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3845 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 3846 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 3847 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 3848 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3849 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 3850 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 3851 }; 3852 3853 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = { 3854 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3856 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3861 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3862 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3863 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf" 3864 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf" 3865 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3866 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3867 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3868 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3869 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3870 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3871 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3872 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3873 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3874 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3875 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3876 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00" 3877 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3878 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" 3879 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3880 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" 3881 }; 3882 3883 static unsigned short const xmlunicodetable_ISO8859_16 [128] = { 3884 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 3885 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 3886 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 3887 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 3888 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7, 3889 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b, 3890 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7, 3891 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c, 3892 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7, 3893 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 3894 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a, 3895 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df, 3896 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7, 3897 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 3898 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b, 3899 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff, 3900 }; 3901 3902 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = { 3903 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00" 3904 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3905 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3906 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3909 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3910 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" 3911 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" 3912 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00" 3913 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00" 3914 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00" 3915 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00" 3916 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3917 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3918 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3919 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00" 3920 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3921 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00" 3922 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3923 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3926 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3927 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3928 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00" 3929 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3930 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3931 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00" 3932 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3935 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00" 3936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3937 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 3938 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" 3939 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf" 3940 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" 3941 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff" 3942 }; 3943 3944 3945 /* 3946 * auto-generated functions for ISO-8859-2 .. ISO-8859-16 3947 */ 3948 3949 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen, 3950 const unsigned char* in, int *inlen) { 3951 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2); 3952 } 3953 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen, 3954 const unsigned char* in, int *inlen) { 3955 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2); 3956 } 3957 3958 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen, 3959 const unsigned char* in, int *inlen) { 3960 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3); 3961 } 3962 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen, 3963 const unsigned char* in, int *inlen) { 3964 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3); 3965 } 3966 3967 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen, 3968 const unsigned char* in, int *inlen) { 3969 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4); 3970 } 3971 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen, 3972 const unsigned char* in, int *inlen) { 3973 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4); 3974 } 3975 3976 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen, 3977 const unsigned char* in, int *inlen) { 3978 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5); 3979 } 3980 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen, 3981 const unsigned char* in, int *inlen) { 3982 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5); 3983 } 3984 3985 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen, 3986 const unsigned char* in, int *inlen) { 3987 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6); 3988 } 3989 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen, 3990 const unsigned char* in, int *inlen) { 3991 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6); 3992 } 3993 3994 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen, 3995 const unsigned char* in, int *inlen) { 3996 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7); 3997 } 3998 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen, 3999 const unsigned char* in, int *inlen) { 4000 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7); 4001 } 4002 4003 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen, 4004 const unsigned char* in, int *inlen) { 4005 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8); 4006 } 4007 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen, 4008 const unsigned char* in, int *inlen) { 4009 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8); 4010 } 4011 4012 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen, 4013 const unsigned char* in, int *inlen) { 4014 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9); 4015 } 4016 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen, 4017 const unsigned char* in, int *inlen) { 4018 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9); 4019 } 4020 4021 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen, 4022 const unsigned char* in, int *inlen) { 4023 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10); 4024 } 4025 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen, 4026 const unsigned char* in, int *inlen) { 4027 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10); 4028 } 4029 4030 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen, 4031 const unsigned char* in, int *inlen) { 4032 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11); 4033 } 4034 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen, 4035 const unsigned char* in, int *inlen) { 4036 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11); 4037 } 4038 4039 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen, 4040 const unsigned char* in, int *inlen) { 4041 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13); 4042 } 4043 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen, 4044 const unsigned char* in, int *inlen) { 4045 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13); 4046 } 4047 4048 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen, 4049 const unsigned char* in, int *inlen) { 4050 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14); 4051 } 4052 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen, 4053 const unsigned char* in, int *inlen) { 4054 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14); 4055 } 4056 4057 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen, 4058 const unsigned char* in, int *inlen) { 4059 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15); 4060 } 4061 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen, 4062 const unsigned char* in, int *inlen) { 4063 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15); 4064 } 4065 4066 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen, 4067 const unsigned char* in, int *inlen) { 4068 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16); 4069 } 4070 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen, 4071 const unsigned char* in, int *inlen) { 4072 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16); 4073 } 4074 4075 static void 4076 xmlRegisterCharEncodingHandlersISO8859x (void) { 4077 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2); 4078 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3); 4079 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4); 4080 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5); 4081 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6); 4082 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7); 4083 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8); 4084 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9); 4085 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10); 4086 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11); 4087 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13); 4088 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14); 4089 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15); 4090 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16); 4091 } 4092 4093 #endif 4094 #endif 4095 4096 #define bottom_encoding 4097 #include "elfgcchack.h"