/ CFXMLParser.c
CFXMLParser.c
   1  /*
   2   * Copyright (c) 2015 Apple Inc. All rights reserved.
   3   *
   4   * @APPLE_LICENSE_HEADER_START@
   5   *
   6   * This file contains Original Code and/or Modifications of Original Code
   7   * as defined in and that are subject to the Apple Public Source License
   8   * Version 2.0 (the 'License'). You may not use this file except in
   9   * compliance with the License. Please obtain a copy of the License at
  10   * http://www.opensource.apple.com/apsl/ and read it before using this
  11   * file.
  12   *
  13   * The Original Code and all software distributed under the License are
  14   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18   * Please see the License for the specific language governing rights and
  19   * limitations under the License.
  20   *
  21   * @APPLE_LICENSE_HEADER_END@
  22   */
  23  
  24  /*	CFXMLParser.c
  25  	Copyright (c) 1999-2014, Apple Inc. All rights reserved.
  26  	Responsibility: David Smith
  27  */
  28  
  29  #include <CoreFoundation/CFXMLParser.h>
  30  #include <CoreFoundation/CFNumber.h>
  31  #include "CFXMLInputStream.h"
  32  #include "CFUniChar.h" 
  33  #include "CFInternal.h"
  34  
  35  #pragma GCC diagnostic push
  36  #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  37  
  38  struct __CFXMLParser {
  39      CFRuntimeBase _cfBase;
  40  
  41      _CFXMLInputStream input;
  42      
  43      void **stack;
  44      void **top;
  45      UInt32 capacity;
  46  
  47      struct __CFXMLNode *node;  // Our private node; we use it to report back information
  48      CFMutableDictionaryRef argDict;
  49      CFMutableArrayRef argArray;
  50  
  51      UInt32 options;
  52      CFXMLParserCallBacks callBacks;
  53      CFXMLParserContext context;
  54  
  55      CFXMLParserStatusCode status;
  56      CFStringRef errorString;
  57  };
  58  
  59  static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
  60      const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf;
  61      return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser);
  62  }
  63  
  64  static void __CFXMLParserDeallocate(CFTypeRef cf) {
  65      struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
  66      CFAllocatorRef alloc = CFGetAllocator(parser);
  67      _freeInputStream(&(parser->input));
  68      if (parser->argDict) CFRelease(parser->argDict);
  69      if (parser->argArray) CFRelease(parser->argArray);
  70      if (parser->errorString) CFRelease(parser->errorString);
  71      if (parser->node) CFRelease(parser->node);
  72      CFAllocatorDeallocate(alloc, parser->stack);
  73      if (parser->context.info && parser->context.release) {
  74          parser->context.release(parser->context.info);
  75      }
  76  }
  77  
  78  static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
  79  
  80  static const CFRuntimeClass __CFXMLParserClass = {
  81      0,
  82      "CFXMLParser",
  83      NULL,      // init
  84      NULL,      // copy
  85      __CFXMLParserDeallocate,
  86      NULL,
  87      NULL,
  88      NULL,      // 
  89      __CFXMLParserCopyDescription
  90  };
  91  
  92  CFTypeID CFXMLParserGetTypeID(void) {
  93      static dispatch_once_t initOnce;
  94      dispatch_once(&initOnce, ^{ __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass); });
  95      return __kCFXMLParserTypeID;
  96  }
  97  
  98  void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
  99      CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
 100      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 101      if (context) {
 102  	context->version = parser->context.version;
 103  	context->info = parser->context.info;
 104  	context->retain = parser->context.retain;
 105  	context->release = parser->context.release;
 106  	context->copyDescription = parser->context.copyDescription;
 107  	UNFAULT_CALLBACK(context->retain);
 108  	UNFAULT_CALLBACK(context->release);
 109  	UNFAULT_CALLBACK(context->copyDescription);
 110      }
 111  }
 112  
 113  void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
 114      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 115      if (callBacks) {
 116          callBacks->version = parser->callBacks.version;
 117          callBacks->createXMLStructure = parser->callBacks.createXMLStructure;
 118          callBacks->addChild = parser->callBacks.addChild;
 119          callBacks->endXMLStructure = parser->callBacks.endXMLStructure;
 120          callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity;
 121          callBacks->handleError = parser->callBacks.handleError;
 122  	UNFAULT_CALLBACK(callBacks->createXMLStructure);
 123  	UNFAULT_CALLBACK(callBacks->addChild);
 124  	UNFAULT_CALLBACK(callBacks->endXMLStructure);
 125  	UNFAULT_CALLBACK(callBacks->resolveExternalEntity);
 126  	UNFAULT_CALLBACK(callBacks->handleError);
 127      }
 128  }
 129  
 130  CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
 131      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 132      return parser->input.url;
 133  }
 134  
 135  /* Returns the character index or line number of the current parse location */
 136  CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
 137      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 138      return _inputStreamCurrentLocation(&parser->input);
 139  }
 140  
 141  CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
 142      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 143      return _inputStreamCurrentLine(&parser->input);
 144  }
 145  
 146  /* Returns the top-most object returned by the createXMLStructure callback */
 147  void *CFXMLParserGetDocument(CFXMLParserRef parser) {
 148      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 149      if (parser->capacity > 0)
 150          return parser->stack[0];
 151      else
 152          return NULL;
 153  }
 154  
 155  CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
 156      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 157      return parser->status;
 158  }
 159  
 160  CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
 161      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 162      return (CFStringRef)CFRetain(parser->errorString);
 163  }
 164  
 165  void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
 166      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 167      CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
 168      CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
 169      __CFGenericValidateType(errorDescription, CFStringGetTypeID());
 170  
 171      parser->status = errorCode;
 172      if (parser->errorString) CFRelease(parser->errorString);
 173      parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription);
 174  }
 175  
 176  
 177  static Boolean parseXML(CFXMLParserRef parser);
 178  static Boolean parseComment(CFXMLParserRef parser, Boolean report);
 179  static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
 180  static Boolean parseInlineDTD(CFXMLParserRef parser);
 181  static Boolean parseDTD(CFXMLParserRef parser);
 182  static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
 183  static Boolean parseCDSect(CFXMLParserRef parser);
 184  static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
 185  static Boolean parsePCData(CFXMLParserRef parser);
 186  static Boolean parseWhitespace(CFXMLParserRef parser);
 187  static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
 188  static Boolean parseNotationDeclaration(CFXMLParserRef parser);
 189  static Boolean parseElementDeclaration(CFXMLParserRef parser);
 190  static Boolean parseEntityDeclaration(CFXMLParserRef parser);
 191  static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
 192  static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
 193  static Boolean parseTagContent(CFXMLParserRef parser);
 194  static Boolean parseTag(CFXMLParserRef parser);
 195  static Boolean parseAttributes(CFXMLParserRef parser);
 196  static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
 197  
 198  // Utilities; may need to make these accessible to the property list parser to avoid code duplication
 199  static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
 200  static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go
 201  static void pushXMLNode(CFXMLParserRef parser, void *node);
 202  
 203  static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
 204      struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
 205      struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
 206      UniChar *buf;
 207      if (parser && node) {
 208          alloc = CFGetAllocator(parser);
 209          _initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
 210          parser->top = parser->stack;
 211          parser->stack = NULL;
 212          parser->capacity = 0;
 213  
 214          buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
 215          parser->node = node;
 216          parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
 217          parser->node->additionalData = NULL;
 218          parser->node->version = version;
 219          parser->argDict = NULL; // don't create these until necessary
 220          parser->argArray = NULL;
 221  
 222          parser->options = options;
 223          parser->callBacks = *callBacks;
 224  
 225          FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
 226          FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
 227          FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
 228          FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
 229          FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
 230          
 231          if (context) {
 232              parser->context = *context;
 233              if (parser->context.info && parser->context.retain) {
 234                  parser->context.retain(parser->context.info);
 235              }
 236          } else {
 237              parser->context.version = 0;
 238              parser->context.info = NULL;
 239              parser->context.retain = NULL;
 240              parser->context.release = NULL;
 241              parser->context.copyDescription = NULL;
 242          }
 243          parser->status = kCFXMLStatusParseNotBegun;
 244          parser->errorString = NULL;
 245      } else {
 246          if (parser) CFRelease(parser);
 247          if (node) CFRelease(node);
 248          parser = NULL;
 249      }
 250      return parser;
 251  }
 252  
 253  CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
 254      CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
 255      __CFGenericValidateType(xmlData, CFDataGetTypeID());
 256      CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
 257      CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
 258      CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
 259      CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
 260      return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
 261  }
 262  
 263  CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
 264      CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
 265      CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
 266      CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
 267      CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
 268  
 269      return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context);
 270  }
 271  
 272  Boolean CFXMLParserParse(CFXMLParserRef parser) {
 273      CFXMLDocumentInfo docData;
 274      __CFGenericValidateType(parser, CFXMLParserGetTypeID());
 275      if (parser->status != kCFXMLStatusParseNotBegun) return false;
 276      parser->status = kCFXMLStatusParseInProgress;
 277  
 278      if (!_openInputStream(&parser->input)) {
 279          if (!parser->input.data) {
 280              // couldn't load URL
 281              parser->status = kCFXMLErrorNoData;
 282              parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
 283          } else {
 284              // couldn't figure out the encoding
 285              CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
 286              parser->status = kCFXMLErrorUnknownEncoding;
 287              parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
 288          }
 289          if (parser->callBacks.handleError) {
 290              INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
 291          }
 292          return false;
 293      }
 294      
 295      // Create the document
 296      parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
 297      parser->capacity = 16;
 298      parser->node->dataTypeID = kCFXMLNodeTypeDocument;
 299      docData.encoding = _inputStreamGetEncoding(&parser->input);
 300      docData.sourceURL = parser->input.url;
 301      parser->node->additionalData = &docData;
 302      parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
 303      parser->top = parser->stack;
 304      parser->node->additionalData = NULL;
 305  
 306      // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
 307      if (parser->status != kCFXMLStatusParseInProgress) {
 308          _CFReportError(parser, parser->status, NULL);
 309          return false;
 310      }
 311      return parseXML(parser);
 312  }
 313  
 314  /* The next several functions are all intended to parse past a particular XML structure.  They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--").  They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go.  They either return void (not possible for the parse to fail) or they return a Boolean (success/failure).  The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
 315  
 316  // [3]  S ::= (#x20 | #x9 | #xD | #xA)+ 
 317  static Boolean parseWhitespace(CFXMLParserRef parser) {
 318      CFIndex len;
 319      Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
 320      len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
 321      if (report && len) {
 322          parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
 323          parser->node->additionalData = NULL;
 324          return reportNewLeaf(parser);
 325      } else {
 326          return true;
 327      }
 328  }
 329  
 330  // parser should be just past "<!--"
 331  static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
 332      const UniChar dashes[2] = {'-', '-'};
 333      UniChar ch;
 334      report = report && (!(parser->options & kCFXMLParserSkipMetaData));
 335      if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
 336          _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
 337          return false;
 338      } else if (ch != '>') {
 339          _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
 340          return false;
 341      } else if (report) {
 342          parser->node->dataTypeID = kCFXMLNodeTypeComment;
 343          parser->node->additionalData = NULL;
 344          return reportNewLeaf(parser);
 345      } else {
 346          return true;
 347      }
 348  }
 349  
 350  /* 
 351  [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
 352  [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
 353   */
 354  // parser should be set to the first character after "<?"
 355  static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
 356      const UniChar piTermination[2] = {'?', '>'};
 357      CFMutableStringRef str;
 358      CFStringRef name;
 359      
 360      if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
 361          _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
 362          return false;
 363      }
 364      _inputStreamSkipWhitespace(&parser->input, NULL);
 365      str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
 366      if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
 367          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
 368          if (str) CFRelease(str);
 369          return false;
 370      }
 371  
 372      if (str) {
 373          CFXMLProcessingInstructionInfo data;
 374          Boolean result;
 375          CFStringRef tmp = parser->node->dataString;
 376          parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
 377          parser->node->dataString = name;
 378          data.dataString = str;
 379          parser->node->additionalData = &data;
 380          result = reportNewLeaf(parser);
 381          parser->node->additionalData = NULL;
 382          parser->node->dataString = tmp;
 383          CFRelease(str);
 384          return result;
 385      } else {
 386          return true;
 387      }
 388  }
 389  
 390  /*
 391   [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
 392  */
 393  static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
 394  // first character should be immediately after the "<!"
 395  static Boolean parseDTD(CFXMLParserRef parser) {
 396      UniChar ch;
 397      Boolean success, hasExtID = false;
 398      CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
 399      void *dtdStructure = NULL;
 400      CFStringRef name;
 401      
 402      // First pass "DOCTYPE"
 403      success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
 404      success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
 405      success = success && _inputStreamScanXMLName(&parser->input, false, &name);
 406      if (success) {
 407          _inputStreamSkipWhitespace(&parser->input, NULL);
 408          success = _inputStreamPeekCharacter(&parser->input, &ch);
 409      } else {
 410          // didn't make it past "DOCTYPE" successfully.
 411          _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
 412          return false;
 413      }
 414      if (success && ch != '[' && ch != '>') {
 415          // ExternalID
 416          hasExtID = true;
 417          success = parseExternalID(parser, false, &(docData.externalID));
 418          if (success)  {
 419              _inputStreamSkipWhitespace(&parser->input, NULL);
 420              success = _inputStreamPeekCharacter(&parser->input, &ch);
 421          }
 422      }
 423  
 424      if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
 425          CFStringRef tmp = parser->node->dataString;
 426          parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
 427          parser->node->dataString = name;
 428          parser->node->additionalData = &docData;
 429          dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
 430          if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
 431              INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
 432          }
 433          parser->node->additionalData = NULL;
 434          parser->node->dataString = tmp;
 435          if (parser->status != kCFXMLStatusParseInProgress) {
 436              // callback called CFXMLParserAbort()
 437              _CFReportError(parser, parser->status, NULL);
 438              return false;
 439          }
 440      } else {
 441          dtdStructure = NULL;
 442      }
 443      if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
 444      if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
 445      pushXMLNode(parser, dtdStructure);
 446  
 447      if (success && ch == '[')  {
 448          // inline DTD
 449          _inputStreamGetCharacter(&parser->input, &ch);
 450          if (!parseInlineDTD(parser)) return false;
 451          _inputStreamSkipWhitespace(&parser->input, NULL);
 452          success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
 453      } else if (success && ch == '>') {
 454          // End of the DTD
 455          _inputStreamGetCharacter(&parser->input, &ch);
 456      }
 457      if (!success) {
 458          if (_inputStreamAtEOF(&parser->input)) {
 459              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
 460          } else {
 461              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
 462          }
 463          return false;
 464      }
 465  
 466      parser->top --; // Remove dtdStructure from the stack
 467  
 468      if (success && dtdStructure) {
 469          INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
 470          if (parser->status != kCFXMLStatusParseInProgress) {
 471              _CFReportError(parser, parser->status, NULL);
 472              return false;
 473          }
 474      }
 475      return true;
 476  }
 477  
 478  /*
 479   [69] PEReference ::= '%' Name ';'
 480  */
 481  static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
 482      UniChar ch;
 483      CFStringRef name;
 484      if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
 485          _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
 486          return false;
 487      } else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
 488          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
 489          return false;
 490      } else if (ch != ';') {
 491          _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
 492          return false;
 493      } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
 494          CFXMLEntityReferenceInfo myData;
 495          Boolean result;
 496          CFStringRef tmp = parser->node->dataString;
 497          parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
 498          parser->node->dataString = name;
 499          myData.entityType = kCFXMLEntityTypeParameter;
 500          parser->node->additionalData = &myData;
 501          result = reportNewLeaf(parser);
 502          parser->node->additionalData = NULL;
 503          parser->node->dataString = tmp;
 504          return result;
 505      } else {
 506          return true;
 507      }
 508  }
 509  
 510  /*
 511   [54] AttType ::= StringType | TokenizedType | EnumeratedType
 512   [55] StringType ::= 'CDATA'
 513   [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
 514   [57] EnumeratedType ::= NotationType | Enumeration
 515   [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
 516   [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
 517  */
 518  static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
 519      UniChar ch;
 520      Boolean done = false;
 521      if (!_inputStreamGetCharacter(&parser->input, &ch)) {
 522          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
 523          return false;
 524      } else if (ch != '(') {
 525          _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 526          return false;
 527      }
 528      _inputStreamSkipWhitespace(&parser->input, NULL);
 529      if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
 530          _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 531          return false;
 532      }
 533      while (!done) {
 534          _inputStreamSkipWhitespace(&parser->input, NULL);
 535          if (!_inputStreamGetCharacter(&parser->input, &ch)) {
 536              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
 537              return false;
 538          } else if (ch == ')') {
 539              done = true;
 540          } else if (ch == '|') {
 541              _inputStreamSkipWhitespace(&parser->input, NULL);
 542              if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
 543                  _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 544                  return false;
 545              }
 546          } else {
 547              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 548              return false;
 549          }
 550      }
 551      return true;
 552  }
 553  
 554  static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
 555      Boolean success = false;
 556      static const UniChar attTypeStrings[6][8] = {
 557      {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
 558      {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
 559      {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
 560      {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
 561      {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
 562      {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
 563      if (str) _inputStreamSetMark(&parser->input);
 564      if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
 565          _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
 566          _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
 567          _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
 568          _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
 569          _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
 570          _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
 571          _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
 572          success = true;
 573      } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
 574          // Notation
 575          if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
 576              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 577              success = false;
 578          } else  {
 579              success = parseEnumeration(parser, false);
 580          }
 581      } else {
 582          success = parseEnumeration(parser, true);
 583      }
 584      if (str) {
 585          if (success) {
 586              _inputStreamGetCharactersFromMark(&parser->input, str);
 587          }
 588          _inputStreamClearMark(&parser->input);
 589      }
 590      return success;
 591  }
 592  
 593  /*  [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
 594  static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
 595      const UniChar strings[3][8] = {
 596      {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
 597      {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
 598      {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
 599      UniChar ch;
 600      Boolean success;
 601      if (str) _inputStreamSetMark(&parser->input);
 602      if (!_inputStreamGetCharacter(&parser->input, &ch)) {
 603          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
 604          success = false;
 605      } else if (ch == '#') {
 606          if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
 607              _inputStreamMatchString(&parser->input, strings[1], 7)) {
 608              success = true;
 609          } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
 610              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 611              success = false;
 612          } else {
 613              // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
 614              success = parseAttributeValue(parser, NULL);
 615          }
 616      } else {
 617          _inputStreamReturnCharacter(&parser->input, ch);
 618          success = parseAttributeValue(parser, NULL);
 619      }
 620      if (str) {
 621          if (success) {
 622              _inputStreamGetCharactersFromMark(&parser->input, str);
 623          }
 624          _inputStreamClearMark(&parser->input);
 625      }
 626      return success;
 627  }
 628  
 629  /*
 630   [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
 631   [53] AttDef ::= S Name S AttType S DefaultDecl
 632  */
 633  static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
 634      const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
 635      CFXMLAttributeListDeclarationInfo attListData;
 636      CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
 637      CFIndex capacity = 8;
 638      UniChar ch;
 639      Boolean success = true;
 640      CFStringRef name;
 641      if (!_inputStreamMatchString(&parser->input, attList, 7) ||
 642          _inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
 643          !_inputStreamScanXMLName(&parser->input, false, &name)) {
 644          _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 645          return false;
 646      }
 647      attListData.numberOfAttributes = 0;
 648      if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
 649          // Use this to mark that we don't need to collect attribute information to report to the client.  Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client.  -- REW, 2/9/2000
 650          attributes = NULL;
 651      }
 652      while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
 653          CFXMLAttributeDeclarationInfo *attribute = NULL;
 654          if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
 655              break;
 656          if (attributes) {
 657              if (capacity == attListData.numberOfAttributes) {
 658                  capacity = 2*capacity;
 659                  if (attributes != attributeArray) {
 660                      attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
 661                  } else {
 662                      attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
 663                  }
 664              }
 665              attribute = &(attributes[attListData.numberOfAttributes]);
 666              // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed.  -- REW, 2/9/2000
 667              attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
 668              attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
 669          }
 670          if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
 671              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 672              success = false;
 673              break;
 674          }
 675          if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
 676              success = false;
 677              break;
 678          }
 679          if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
 680              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 681              success = false;
 682              break;
 683          }
 684          if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
 685              success = false;
 686              break;
 687          }
 688          attListData.numberOfAttributes ++;
 689      }
 690      if (success) {
 691          if (!_inputStreamGetCharacter(&parser->input, &ch)) {
 692              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
 693              success = false;
 694          } else if (ch != '>') {
 695              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 696              success = false;
 697          } else if (attributes) {
 698              CFStringRef tmp = parser->node->dataString;
 699              parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
 700              parser->node->dataString = name;
 701              attListData.attributes = attributes;
 702              parser->node->additionalData = (void *)&attListData;
 703              success = reportNewLeaf(parser);
 704              parser->node->additionalData = NULL;
 705              parser->node->dataString = tmp;
 706          }
 707      }
 708      if (attributes) {
 709          // Free up all that memory
 710          CFIndex idx;
 711          for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
 712              // Do not release attributeName here; it's a uniqued string from scanXMLName
 713              CFRelease(attributes[idx].typeString);
 714              CFRelease(attributes[idx].defaultString);
 715          }
 716          if (attributes != attributeArray) {
 717              CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
 718          }
 719      }
 720      return success;
 721  }
 722  
 723  CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
 724      Boolean success;
 725      if (extID) {
 726          CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
 727          if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
 728              success = true;
 729              extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
 730          } else {
 731              extID->systemID = NULL;
 732              success = false;
 733          }
 734          CFRelease(urlStr);
 735      } else {
 736          success = _inputStreamScanQuotedString(&parser->input, NULL);
 737      }
 738      return success;
 739  }
 740  
 741  /*
 742   [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
 743   [83] PublicID ::= 'PUBLIC' S PubidLiteral
 744   [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
 745   [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
 746   [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 
 747  */
 748  // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred.  -- REW, 2/2/2000
 749  static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
 750      const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
 751      const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
 752      Boolean success;
 753      if (extID) {
 754          extID->systemID = NULL;
 755          extID->publicID = NULL;
 756      }
 757      if (_inputStreamMatchString(&parser->input, publicString, 6)) {
 758          success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
 759          if (extID) {
 760              extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
 761              success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
 762          } else {
 763              success = success && _inputStreamScanQuotedString(&parser->input, NULL);
 764          }
 765          if (success) {
 766              UniChar ch;
 767              if (alsoAcceptPublicID) {
 768                  _inputStreamSetMark(&parser->input); // In case we need to roll back the parser
 769              }
 770              if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
 771                  || !_inputStreamPeekCharacter(&parser->input, &ch)
 772                  || (ch != '\'' && ch != '\"')
 773                  || !parseSystemLiteral(parser, extID)) {
 774                  success = alsoAcceptPublicID;
 775                  if (alsoAcceptPublicID) {
 776                      _inputStreamBackUpToMark(&parser->input);
 777                  }
 778              } else {
 779                  success = true;
 780              }
 781              if (alsoAcceptPublicID) {
 782                  _inputStreamClearMark(&parser->input);
 783              }
 784          }
 785      } else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
 786          success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
 787      } else {
 788          success = false;
 789      }
 790      return success;
 791  }
 792  
 793  /*
 794   [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
 795  */
 796  static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
 797      static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
 798      Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
 799      CFXMLNotationInfo notationData = {{NULL, NULL}};
 800      CFStringRef name;
 801      Boolean success =
 802          _inputStreamMatchString(&parser->input, notationString, 8) &&
 803          _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
 804          _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
 805          _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
 806          parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
 807  
 808      if (success) {
 809          UniChar ch;
 810          _inputStreamSkipWhitespace(&parser->input, NULL);
 811          success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
 812      }
 813      if (!success) {
 814          if (_inputStreamAtEOF(&parser->input)) {
 815              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
 816          } else {
 817              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 818          }
 819      } else if (report) {
 820          CFStringRef tmp = parser->node->dataString;
 821          parser->node->dataTypeID = kCFXMLNodeTypeNotation;
 822          parser->node->dataString = name;
 823          parser->node->additionalData = &notationData;
 824          success = reportNewLeaf(parser);
 825          parser->node->additionalData = NULL;
 826          parser->node->dataString = tmp;
 827      }
 828      if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
 829      if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
 830      return success;
 831  }
 832  
 833  /*
 834   [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
 835   [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
 836   [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
 837  */
 838  static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
 839      UniChar ch, separator;
 840      if (!pastParen) {
 841          if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
 842          _inputStreamSkipWhitespace(&parser->input, NULL);
 843      }
 844      if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
 845  
 846      /* Now scanning cp, production [48] */
 847      if (ch == '(') {
 848          if (!parseChoiceOrSequence(parser, false)) return false;
 849      } else {
 850          if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
 851      }
 852      if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
 853      if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
 854  
 855      /* Now past cp */
 856      _inputStreamSkipWhitespace(&parser->input, NULL);
 857      if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
 858      if (ch == ')') return true;
 859      if (ch != '|' && ch != ',') return false;
 860      separator = ch;
 861      while (ch == separator) {
 862          _inputStreamSkipWhitespace(&parser->input, NULL);
 863          if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
 864          if (ch != '(') {
 865              if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
 866          } else if (!parseChoiceOrSequence(parser, false)) {
 867              return false;
 868          }
 869          _inputStreamSkipWhitespace(&parser->input, NULL);
 870          if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
 871      }
 872      return ch == ')';
 873  }
 874  
 875  /*
 876   [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
 877  */
 878  static Boolean parseMixedElementContent(CFXMLParserRef parser) {
 879      static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
 880      UniChar ch;
 881      if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
 882      _inputStreamSkipWhitespace(&parser->input, NULL);
 883      if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
 884      if (ch == ')') return true;
 885  
 886      while (ch == '|') {
 887          _inputStreamSkipWhitespace(&parser->input, NULL);
 888          if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
 889          _inputStreamSkipWhitespace(&parser->input, NULL);
 890          if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
 891      }
 892      if (ch != ')') return false;
 893      if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
 894      return true;
 895  }
 896  
 897  /*
 898   [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
 899   [47] children ::= (choice | seq) ('?' | '*' | '+')?
 900   */
 901  static Boolean parseElementContentSpec(CFXMLParserRef parser) {
 902      static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
 903      static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
 904      UniChar ch;
 905      if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
 906          return true;
 907      } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
 908          return false;
 909      } else {
 910          // We want to know if we have a Mixed per production [51].  If we don't, we will need to back up and call the parseChoiceOrSequence function.  So we set the mark now.  -- REW, 2/10/2000
 911          _inputStreamGetCharacter(&parser->input, &ch);
 912          _inputStreamSkipWhitespace(&parser->input, NULL);
 913          if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
 914          if (ch == '#') {
 915              // Mixed
 916              return parseMixedElementContent(parser);
 917          } else {
 918              if (parseChoiceOrSequence(parser, true)) {
 919                  if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
 920                      _inputStreamGetCharacter(&parser->input, &ch);
 921                  }
 922                  return true;
 923              } else {
 924                  return false;
 925              }
 926          }
 927      }
 928  }
 929  
 930  /*
 931   [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
 932   */
 933  static Boolean parseElementDeclaration(CFXMLParserRef parser) {
 934      Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
 935      Boolean success;
 936      static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
 937      UniChar ch = '>';
 938      CFMutableStringRef contentDesc = NULL;
 939      CFStringRef name;
 940      success = _inputStreamMatchString(&parser->input, eltChars, 7)
 941          && _inputStreamSkipWhitespace(&parser->input, NULL) != 0
 942          && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
 943          && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
 944      if (success) {
 945          if (report) _inputStreamSetMark(&parser->input);
 946          success = parseElementContentSpec(parser);
 947          if (success && report) {
 948              contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
 949              _inputStreamGetCharactersFromMark(&parser->input, contentDesc);
 950          }
 951          if (report) _inputStreamClearMark(&parser->input);
 952          if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
 953          success = success && _inputStreamMatchString(&parser->input, &ch, 1);
 954      }
 955      if (!success) {
 956          if (_inputStreamAtEOF(&parser->input)) {
 957              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
 958          } else {
 959              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
 960          }
 961      } else if (report) {
 962          CFXMLElementTypeDeclarationInfo eltData;
 963          CFStringRef tmp = parser->node->dataString;
 964          parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
 965          parser->node->dataString = name;
 966          eltData.contentDescription = contentDesc;
 967          parser->node->additionalData = &eltData;
 968          success = reportNewLeaf(parser);
 969          parser->node->additionalData = NULL;
 970          parser->node->dataString = tmp;
 971      }
 972      if (contentDesc) CFRelease(contentDesc);
 973      return success;
 974  }
 975  
 976  /*
 977   [70] EntityDecl ::= GEDecl | PEDecl
 978   [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
 979   [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
 980   [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
 981   [74] PEDef ::= EntityValue | ExternalID
 982   [76] NDataDecl ::= S 'NDATA' S Name
 983   [9]  EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |  "'" ([^%&'] | PEReference | Reference)* "'"
 984  */
 985  static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
 986      const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
 987      UniChar ch;
 988      Boolean isPEDecl = false;
 989      CFXMLEntityInfo entityData;
 990      CFStringRef name;
 991      Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
 992      Boolean success =
 993          _inputStreamMatchString(&parser->input, entityStr, 6) &&
 994          (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
 995          _inputStreamPeekCharacter(&parser->input, &ch);
 996  
 997      entityData.replacementText = NULL;
 998      entityData.entityID.publicID = NULL;
 999      entityData.entityID.systemID = NULL;
1000      entityData.notationName = NULL;
1001      // We will set entityType immediately before reporting
1002  
1003      if (success && ch == '%') {
1004          _inputStreamGetCharacter(&parser->input, &ch);
1005          success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
1006          isPEDecl = true;
1007      }
1008      success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
1009      if (success && (ch == '\"' || ch == '\'')) {
1010          // EntityValue
1011          // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1012          if (report) {
1013              entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1014              success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
1015          } else {
1016              success = _inputStreamScanQuotedString(&parser->input, NULL);
1017          }
1018      } else if (success) {
1019          // ExternalID
1020          success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
1021          if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
1022              // There could be an option NDataDecl
1023              // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1024              const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
1025              if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
1026                  success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
1027              }
1028          }
1029      }
1030      if (success) {
1031          _inputStreamSkipWhitespace(&parser->input, NULL);
1032          success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
1033      }
1034      if (!success) {
1035          if (_inputStreamAtEOF(&parser->input)) {
1036              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1037          } else {
1038              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1039          }
1040      } else {
1041          CFStringRef tmp = parser->node->dataString;
1042          if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
1043          else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
1044          else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
1045          else entityData.entityType = kCFXMLEntityTypeUnparsed;
1046          parser->node->dataTypeID = kCFXMLNodeTypeEntity;
1047          parser->node->dataString = name;
1048          parser->node->additionalData = &entityData;
1049          success = reportNewLeaf(parser);
1050          parser->node->additionalData = NULL;
1051          parser->node->dataString = tmp;
1052          if (entityData.replacementText) CFRelease(entityData.replacementText);
1053      }
1054      if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
1055      if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
1056      return success;
1057  }
1058  
1059  /*
1060   [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1061   [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment 
1062  */ 
1063  // First character should be just past '['
1064  static Boolean parseInlineDTD(CFXMLParserRef parser) {
1065      Boolean success = true;
1066      while (success && !_inputStreamAtEOF(&parser->input)) {
1067          UniChar ch;
1068  
1069          parseWhitespace(parser);
1070          if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
1071          if (ch == '%') {
1072              // PEReference
1073              success = parsePhysicalEntityReference(parser);
1074          } else if (ch == '<') {
1075              // markupdecl
1076              if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1077                  _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1078                  return false;
1079              }
1080              if (ch == '?') {
1081                  // Processing Instruction
1082                  success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1083              } else if (ch == '!') {
1084                  UniChar dashes[2] = {'-', '-'};
1085                  if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1086                      // Comment
1087                      success = parseComment(parser, true);
1088                  } else {
1089                      // elementdecl | AttListDecl | EntityDecl | NotationDecl
1090                      if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1091                          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1092                          return false;
1093                      } else if (ch == 'A') {
1094                          // AttListDecl
1095                          success = parseAttributeListDeclaration(parser);
1096                      } else if (ch == 'N') {
1097                          success = parseNotationDeclaration(parser);
1098                      } else if (ch == 'E') {
1099                          // elementdecl | EntityDecl
1100                          _inputStreamGetCharacter(&parser->input, &ch);
1101                          if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1102                              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1103                              return false;
1104                          }
1105                          _inputStreamReturnCharacter(&parser->input, 'E');
1106                          if (ch == 'L') {
1107                              success = parseElementDeclaration(parser);
1108                          } else if (ch == 'N') {
1109                              success = parseEntityDeclaration(parser);
1110                          } else {
1111                              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1112                              return false;
1113                          }
1114                      } else {
1115                          _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1116                          return false;
1117                      }                        
1118                  }
1119              } else {
1120                  _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1121                  return false;
1122              }
1123          } else if (ch == ']') {
1124              return true;
1125          } else {
1126              _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1127              return false;
1128          }
1129      }
1130      if (success) {
1131          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1132      }
1133      return false;
1134  }
1135  
1136  /*
1137  [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1138   */
1139  static Boolean parseTagContent(CFXMLParserRef parser) {
1140      while (!_inputStreamAtEOF(&parser->input)) {
1141          UniChar ch;
1142          CFIndex numWhitespaceCharacters;
1143          
1144          _inputStreamSetMark(&parser->input);
1145          numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
1146          // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1147          if (!_inputStreamGetCharacter(&parser->input, &ch)) break;  // break == report unexpected EOF
1148  
1149          if (ch != '<' && ch != '&') { // CharData
1150              // Back off the whitespace; we'll report it with the PCData
1151              _inputStreamBackUpToMark(&parser->input);
1152              _inputStreamClearMark(&parser->input);
1153               if (!parsePCData(parser)) return false;
1154               if(_inputStreamComposingErrorOccurred(&parser->input)) {
1155                   _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1156                   return false;
1157               }
1158               continue;
1159          }
1160  
1161          // element | Reference | CDSect | PI | Comment
1162          // We can safely report any whitespace now
1163          if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
1164              _inputStreamReturnCharacter(&parser->input, ch);
1165              _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1166              parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
1167              parser->node->additionalData = NULL;
1168              if (!reportNewLeaf(parser)) return false;
1169              _inputStreamGetCharacter(&parser->input, &ch);
1170          }
1171          _inputStreamClearMark(&parser->input);
1172          
1173          if (ch == '&') {
1174              // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1175              if (!parseEntityReference(parser, true)) return false;
1176              continue;
1177          }
1178  
1179          // ch == '<'; element | CDSect | PI | Comment
1180          if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
1181          if (ch == '?') { // PI
1182              _inputStreamGetCharacter(&parser->input, &ch);
1183              if (!parseProcessingInstruction(parser, true))
1184                  return false;
1185          } else if (ch == '/') { // end tag; we're passing outside of content's production
1186              _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<'
1187              return true;
1188          } else if (ch != '!') { // element
1189              if (!parseTag(parser))  return false;
1190          } else {
1191              // Comment | CDSect
1192              UniChar dashes[3] = {'!', '-', '-'};
1193              if (_inputStreamMatchString(&parser->input, dashes, 3)) {
1194                  // Comment
1195                  if (!parseComment(parser, true)) return false;
1196              } else {
1197                  // Should have a CDSect; back off the "<!" and call parseCDSect
1198                  _inputStreamReturnCharacter(&parser->input, '<');
1199                  if (!parseCDSect(parser)) return false;
1200              }
1201          }
1202      }
1203  
1204      if(_inputStreamComposingErrorOccurred(&parser->input)) {
1205          _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1206          return false;
1207      }
1208      // Only way to get here is if premature EOF was found
1209  //#warning CF:Include the tag name here
1210      _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
1211      return false;
1212  }
1213  
1214  static Boolean parseCDSect(CFXMLParserRef parser) {
1215      const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1216      const UniChar _CDSectClose[3] = {']', ']', '>'};
1217      if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
1218          _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
1219          return false;
1220      }
1221      if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
1222          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
1223          return false;
1224      }
1225  
1226      parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
1227      parser->node->additionalData = NULL;
1228      return reportNewLeaf(parser);
1229  }
1230  
1231  /*
1232   [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1233  */
1234  static Boolean validateCharacterReference(CFStringRef str) {
1235      Boolean isHex;
1236      CFIndex idx, len = CFStringGetLength(str);
1237      if (len < 2) return false;
1238      if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
1239      if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
1240          isHex = true;
1241          idx = 2;
1242          if (len == 2) return false;
1243      } else {
1244          isHex = false;
1245          idx = 1;
1246      }
1247  
1248      while (idx < len) {
1249          UniChar ch;
1250          ch = CFStringGetCharacterAtIndex(str, idx);
1251          idx ++;
1252          if (!(ch <= '9' && ch >= '0') &&
1253              !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
1254              break;
1255          }
1256      }
1257      return (idx == len);
1258  }
1259  
1260  /*
1261   [67] Reference ::= EntityRef | CharRef
1262   [68] EntityRef ::= '&' Name ';'
1263  */
1264  static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
1265      UniChar ch;
1266      CFXMLEntityReferenceInfo entData;
1267      CFStringRef name = NULL;
1268      if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1269          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1270          return false;
1271      }
1272      if (ch == '#') {
1273          ch = ';';
1274          if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
1275              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1276              return false;
1277          } else if (!validateCharacterReference(parser->node->dataString)) {
1278              _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
1279              return false;
1280          }
1281          entData.entityType = kCFXMLEntityTypeCharacter;
1282          name = parser->node->dataString;
1283      } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
1284          if (_inputStreamAtEOF(&parser->input)) {
1285              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1286              return false;
1287          } else {
1288              _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
1289              return false;
1290          }
1291      } else {
1292          entData.entityType = kCFXMLEntityTypeParsedInternal;
1293      }
1294      if (report) {
1295          CFStringRef tmp = parser->node->dataString;
1296          Boolean success;
1297          parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
1298          parser->node->dataString = name;
1299          parser->node->additionalData = &entData;
1300          success = reportNewLeaf(parser);
1301          parser->node->additionalData = NULL;
1302          parser->node->dataString = tmp;
1303          return success;
1304      } else {
1305          return true;
1306      }
1307  }
1308  
1309  #if 0
1310  // Kept from old entity reference parsing....
1311  {
1312      switch (*(parser->curr)) {
1313          case 'l':  // "lt"
1314              if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1315                  ch = '<';
1316                  parser->curr += 3;
1317                  break;
1318              }
1319              parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1320              return;
1321          case 'g': // "gt"
1322              if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1323                  ch = '>';
1324                  parser->curr += 3;
1325                  break;
1326              }
1327              parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1328              return;
1329          case 'a': // "apos" or "amp"
1330              if (len < 4) {   // Not enough characters for either conversion
1331                  parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1332                  return;
1333              }
1334              if (*(parser->curr+1) == 'm') {
1335                  // "amp"
1336                  if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
1337                      ch = '&';
1338                      parser->curr += 4;
1339                      break;
1340                  }
1341              } else if (*(parser->curr+1) == 'p') {
1342                  // "apos"
1343                  if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
1344                      ch = '\'';
1345                      parser->curr += 5;
1346                      break;
1347                  }
1348              }
1349              parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1350              return;
1351          case 'q':  // "quote"
1352              if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
1353                  ch = '\"';
1354                  parser->curr += 6;
1355                  break;
1356              }
1357              parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1358              return;
1359          case '#':
1360          {
1361              UniChar num = 0;
1362              Boolean isHex = false;
1363              if ( len < 4) {  // Not enough characters to make it all fit!  Need at least "&#d;"
1364                  parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1365                  return;
1366              }
1367              parser->curr ++;
1368              if (*(parser->curr) == 'x') {
1369                  isHex = true;
1370                  parser->curr ++;
1371              }
1372              while (parser->curr < parser->end) {
1373                  ch = *(parser->curr);
1374                  if (ch == ';') {
1375                      CFStringAppendCharacters(string, &num, 1);
1376                      parser->curr ++;
1377                      return;
1378                  }
1379                  if (!isHex) num = num*10;
1380                  else num = num << 4;
1381                  if (ch <= '9' && ch >= '0') {
1382                      num += (ch - '0');
1383                  } else if (!isHex) {
1384                      parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1385                      return;
1386                  } else if (ch >= 'a' && ch <= 'f') {
1387                      num += 10 + (ch - 'a');
1388                  } else if (ch >= 'A' && ch <= 'F') {
1389                      num += 10 + (ch - 'A');
1390                  } else {
1391                      parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1392                      return;                    
1393                  }
1394              }
1395              parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1396              return;
1397          }
1398          default:
1399              parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1400              return;
1401      }
1402      CFStringAppendCharacters(string, &ch, 1);
1403  }
1404  #endif
1405  
1406  /*
1407  [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1408  */
1409  static Boolean parsePCData(CFXMLParserRef parser) {
1410      UniChar ch;
1411      Boolean done = false;
1412      _inputStreamSetMark(&parser->input);
1413      while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
1414          switch (ch) {
1415              case '<': 
1416              case '&':
1417                  _inputStreamReturnCharacter(&parser->input, ch);
1418                  done = true;
1419                  break;
1420              case ']':
1421              {
1422                  const UniChar endSequence[2] = {']', '>'};
1423                  if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
1424                      _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
1425                      _inputStreamClearMark(&parser->input);
1426                      return false;
1427                  }
1428                  break;
1429              }
1430              default:
1431                  ;
1432          }
1433      }
1434      _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1435      _inputStreamClearMark(&parser->input);
1436      parser->node->dataTypeID = kCFXMLNodeTypeText;
1437      parser->node->additionalData = NULL;
1438      return reportNewLeaf(parser);
1439  }
1440  
1441  /*
1442  [42] ETag ::= '</' Name S? '>'
1443   */
1444  static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
1445      const UniChar beginEndTag[2] = {'<', '/'};
1446      Boolean unexpectedEOF = false, mismatch = false;
1447      CFStringRef closeTag;
1448  
1449      // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1450      if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
1451          
1452          UniChar ch;
1453          _inputStreamSkipWhitespace(&parser->input, NULL);
1454          if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1455              unexpectedEOF = true;
1456          } else if (ch != '>') {
1457              mismatch = true;
1458          }
1459      } else if (_inputStreamAtEOF(&parser->input)) {
1460          unexpectedEOF = true;
1461      } else {
1462          mismatch = true;
1463      }
1464          
1465      if (unexpectedEOF || mismatch) {
1466          if (unexpectedEOF) {
1467              parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
1468              parser->status = kCFXMLErrorUnexpectedEOF;
1469              if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
1470          } else {
1471              parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
1472              parser->status = kCFXMLErrorMalformedCloseTag;
1473              if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
1474          }
1475          return false;
1476      }
1477      return true;
1478  }
1479  
1480  /*
1481   [39] element ::= EmptyElementTag | STag content ETag
1482   [40] STag ::= '<' Name (S Attribute)* S? '>'
1483   [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1484  */
1485  static Boolean parseTag(CFXMLParserRef parser) {
1486      UniChar ch;
1487      void *tag;
1488      CFXMLElementInfo data;
1489      Boolean success = true;
1490      CFStringRef tagName;
1491  
1492      if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
1493          _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1494          return false;
1495      }
1496  
1497      _inputStreamSkipWhitespace(&parser->input, NULL);
1498      
1499      if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1500      data.attributes = parser->argDict;
1501      data.attributeOrder = parser->argArray;
1502      if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1503          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1504          return false;
1505      }
1506      if (ch == '/') {
1507          data.isEmpty = true;
1508          if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1509              _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1510              return false;
1511          }
1512      } else {
1513          data.isEmpty = false;
1514      }
1515      if (ch != '>') {
1516          _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1517          return false;
1518      }
1519  
1520      if (*parser->top || parser->top == parser->stack) {
1521          CFStringRef oldStr = parser->node->dataString;
1522  	parser->node->dataTypeID = kCFXMLNodeTypeElement;
1523          parser->node->dataString = tagName;
1524  	parser->node->additionalData = &data;
1525          tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1526          if (tag && parser->status == kCFXMLStatusParseInProgress) {
1527              INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
1528          }
1529  	parser->node->additionalData = NULL;
1530          parser->node->dataString = oldStr;
1531          if (parser->status != kCFXMLStatusParseInProgress) {
1532              // callback called CFXMLParserAbort()
1533              _CFReportError(parser, parser->status, NULL);
1534              return false;
1535          }
1536      } else {
1537          tag = NULL;
1538      }
1539  
1540      pushXMLNode(parser, tag);
1541      if (!data.isEmpty) {
1542          success =  parseTagContent(parser);
1543          if (success) {
1544              success = parseCloseTag(parser, tagName);
1545          }
1546      }
1547      parser->top --;
1548  
1549      if (success && tag) {
1550          INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
1551          if (parser->status != kCFXMLStatusParseInProgress) {
1552              _CFReportError(parser, parser->status, NULL);
1553              return false;
1554          }
1555      }
1556      return success;
1557  }
1558  
1559  /*
1560   [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |  "'" ([^<&'] | Reference)* "'"
1561   [67] Reference ::= EntityRef | CharRef
1562   [68] EntityRef ::= '&' Name ';'
1563   */
1564  // For the moment, we don't worry about references in the attribute values.
1565  static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
1566      UniChar quote, ch;
1567      Boolean success = _inputStreamGetCharacter(&parser->input, &quote);
1568      if (!success || (quote != '\'' && quote != '\"')) return false;
1569      if (str) _inputStreamSetMark(&parser->input);
1570      while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
1571          switch (ch) {
1572              case '<': success = false; break;
1573              case '&':
1574                  if (!parseEntityReference(parser, false)) {
1575                      success = false;
1576                      break;
1577                  }
1578              default:
1579                  ;
1580          }
1581      }
1582      
1583      if (success && _inputStreamAtEOF(&parser->input)) {
1584          success = false;
1585      }
1586      if (str) {
1587          if (success) {
1588              _inputStreamReturnCharacter(&parser->input, quote);
1589              _inputStreamGetCharactersFromMark(&parser->input, str);
1590              _inputStreamGetCharacter(&parser->input, &ch);
1591          }
1592          _inputStreamClearMark(&parser->input);
1593      }
1594      return success;
1595  }
1596  
1597  /*
1598   [40] STag ::= '<' Name (S Attribute)* S? '>'
1599   [41] Attribute ::= Name Eq AttValue
1600   [25] Eq ::= S? '=' S?
1601  */
1602  
1603  // Expects parser->curr to be at the first content character; will consume the trailing whitespace.  
1604  Boolean parseAttributes(CFXMLParserRef parser) {
1605      UniChar ch;
1606      CFMutableDictionaryRef dict;
1607      CFMutableArrayRef array;
1608      Boolean failure = false;
1609      if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
1610          if (parser->argDict) {
1611              CFDictionaryRemoveAllValues(parser->argDict);
1612              CFArrayRemoveAllValues(parser->argArray);
1613          }
1614          return true;  // No attributes; let caller deal with it
1615      }
1616      if (!parser->argDict) {
1617          parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1618          parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
1619      } else {
1620          CFDictionaryRemoveAllValues(parser->argDict);
1621          CFArrayRemoveAllValues(parser->argArray);
1622      }
1623      dict = parser->argDict;
1624      array = parser->argArray; 
1625      while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
1626          CFStringRef key;
1627          CFMutableStringRef value;
1628          if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
1629              failure = true;
1630              break;
1631          }
1632          if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
1633                  _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
1634                  return false;
1635          }
1636          _inputStreamSkipWhitespace(&parser->input, NULL);
1637          if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') { 
1638              failure = true;
1639              break;
1640          }
1641          _inputStreamSkipWhitespace(&parser->input, NULL);
1642          value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1643          if (!parseAttributeValue(parser, value)) {
1644              CFRelease(value);
1645              failure = true;
1646              break;
1647          }
1648          CFArrayAppendValue(array, key);
1649          CFDictionarySetValue(dict, key, value);
1650          CFRelease(value);
1651          _inputStreamSkipWhitespace(&parser->input, NULL);
1652      }
1653      if (failure) {
1654  //#warning CF:Include tag name in this error report
1655          _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
1656          return false;
1657      } else if (_inputStreamAtEOF(&parser->input)) {
1658          _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
1659          return false;
1660      } else {
1661          return true;
1662      }
1663  }
1664  
1665  /*
1666   [1]  document ::= prolog element Misc*
1667   [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1668   [27] Misc ::= Comment | PI | S
1669   [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 
1670  
1671   We treat XMLDecl as a plain old PI, since PI is part of Misc.  This changes the prolog and document productions to
1672   [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1673   [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1674  
1675   NOTE: This function assumes parser->stack has a valid top.  I.e. the document pointer has already been created!
1676  */
1677  static Boolean parseXML(CFXMLParserRef parser) {
1678      Boolean success = true, sawDTD = false, sawElement = false;
1679      UniChar ch;
1680      while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
1681          switch (ch) {
1682              case ' ':
1683              case '\n':
1684              case '\t':
1685              case '\r':
1686                  success = parseWhitespace(parser);
1687                  break;
1688              case '<':
1689                  _inputStreamGetCharacter(&parser->input, &ch);
1690                  if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1691                      _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
1692                      return false;
1693                  }
1694                  if (ch == '!') {
1695                      // Comment or DTD
1696                      UniChar dashes[2] = {'-', '-'};
1697                      if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1698                          // Comment
1699                          success = parseComment(parser, true);
1700                      } else {
1701                          // Should be DTD
1702                          if (sawDTD) {
1703                              _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
1704                              return false;
1705                          }
1706                          success = parseDTD(parser);
1707                          if (success) sawDTD = true;
1708                      }
1709                  } else if (ch == '?') {
1710                      // Processing instruction
1711                      success = parseProcessingInstruction(parser, true);
1712                  } else {
1713                      // Tag or malformed
1714                      if (sawElement) {
1715                          _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
1716                          return false;
1717                      }
1718                      _inputStreamReturnCharacter(&parser->input, ch);
1719                      success = parseTag(parser);
1720                      if (success) sawElement = true;
1721                  }
1722                  break;
1723              default: {
1724                  parser->status = kCFXMLErrorMalformedDocument;
1725                  parser->errorString = ch < 256 ?
1726                      CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
1727                      CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
1728  
1729                  if (parser->callBacks.handleError) {
1730                      INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
1731                  }
1732                  return false;
1733              }
1734          }
1735      }
1736      
1737      if (!success) return false;
1738      if (!sawElement) {
1739          _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
1740          return false;
1741      }
1742      return true;
1743  }
1744  
1745  static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
1746      if (str) {
1747          parser->status = errNum;
1748          parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
1749      }
1750      if (parser->callBacks.handleError) {
1751          INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
1752      }
1753  }
1754  
1755  // Assumes parser->node has been set and is ready to go
1756  static Boolean reportNewLeaf(CFXMLParserRef parser) {
1757      void *xmlStruct;
1758      if (*(parser->top) == NULL) return true;
1759  
1760      xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1761      if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
1762          INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
1763          if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
1764      }
1765      if (parser->status != kCFXMLStatusParseInProgress) {
1766          _CFReportError(parser, parser->status, NULL);
1767          return false;
1768      }
1769      return true;
1770  }
1771  
1772  static void pushXMLNode(CFXMLParserRef parser, void *node) {
1773      parser->top ++;
1774      if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
1775          parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
1776          parser->top = parser->stack + parser->capacity;
1777          parser->capacity = 2*parser->capacity;
1778      }
1779      *(parser->top) = node;
1780  }
1781  
1782  /**************************/
1783  /* Parsing to a CFXMLTree */
1784  /**************************/
1785  
1786  static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
1787      CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
1788      CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
1789      CFRelease(myNode);
1790      return (void *)tree;
1791  }
1792  
1793  static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
1794      CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
1795  }
1796  
1797  static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
1798      CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
1799      if (CFTreeGetParent(node))
1800          CFRelease((CFXMLTreeRef)xmlType);
1801  }
1802  
1803  CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) {
1804      CFXMLParserRef parser;
1805      CFXMLParserCallBacks callbacks;
1806      CFXMLTreeRef result;
1807  
1808      CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1809  
1810      callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1811      callbacks.addChild = _XMLTreeAddChild;
1812      callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1813      callbacks.resolveExternalEntity = NULL;
1814      callbacks.handleError = NULL;
1815      parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL);
1816  
1817      if (CFXMLParserParse(parser)) {
1818          result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1819      } else {
1820          result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1821          if (result) CFRelease(result);
1822          result = NULL;
1823      }
1824      CFRelease(parser);
1825      return result;
1826  }
1827  
1828  CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) {
1829      return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
1830  }
1831  
1832  CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
1833  CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
1834  CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
1835  CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
1836  
1837  CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
1838      CFXMLParserRef parser;
1839      CFXMLParserCallBacks callbacks;
1840      CFXMLTreeRef result;
1841  
1842      __CFGenericValidateType(xmlData, CFDataGetTypeID());
1843      CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1844  
1845      callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1846      callbacks.addChild = _XMLTreeAddChild;
1847      callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1848      callbacks.resolveExternalEntity = NULL;
1849      callbacks.handleError = NULL;
1850      parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
1851  
1852      if (CFXMLParserParse(parser)) {
1853          result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1854      } else {
1855          if (errorDict) {	// collect the error dictionary
1856              *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1857              if (*errorDict) {
1858                  CFIndex rawnum;
1859                  CFNumberRef cfnum;
1860                  CFStringRef errstring;
1861                  
1862                  rawnum = CFXMLParserGetLocation(parser);
1863                  cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1864                  if(cfnum) {
1865                      CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
1866                      CFRelease(cfnum);                    
1867                  }
1868                  
1869                  rawnum = CFXMLParserGetLineNumber(parser);
1870                  cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1871                  if(cfnum) {
1872                      CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
1873                      CFRelease(cfnum);                    
1874                  }
1875  
1876                  rawnum = CFXMLParserGetStatusCode(parser);
1877                  cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1878                  if(cfnum) {
1879                      CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
1880                      CFRelease(cfnum);                    
1881                  }
1882  
1883                  errstring = CFXMLParserCopyErrorDescription(parser);
1884                  if(errstring) {
1885                      CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
1886                      CFRelease(errstring);                    
1887                  }
1888              }
1889          }
1890          result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1891          if (result) CFRelease(result);
1892          result = NULL;
1893      }
1894      CFRelease(parser);
1895      return result;
1896  }
1897  
1898  /*
1899   At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1900   We should also be handling items that are up over certain values correctly.
1901   */
1902  CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1903      CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1904      CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string
1905      CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
1906  
1907      CFStringInlineBuffer inlineBuf;
1908      CFIndex idx = 0;
1909      CFIndex mark = idx;
1910      CFIndex stringLength = CFStringGetLength(string);
1911      UniChar uc;
1912  
1913      CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
1914  
1915      CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
1916      for(idx = 0; idx < stringLength; idx++) {
1917          uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
1918          if(CFCharacterSetIsCharacterMember(startChars, uc)) {
1919              CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1920              CFStringAppend(newString, previousSubstring);
1921              CFRelease(previousSubstring);
1922              switch(uc) {
1923                  case '&':
1924                      CFStringAppend(newString, CFSTR("&amp;"));
1925                      break;
1926                  case '<':
1927                      CFStringAppend(newString, CFSTR("&lt;"));
1928                      break;
1929                  case '>':
1930                      CFStringAppend(newString, CFSTR("&gt;"));
1931                      break;
1932                  case '\'':
1933                      CFStringAppend(newString, CFSTR("&apos;"));
1934                      break;
1935                  case '"':
1936                      CFStringAppend(newString, CFSTR("&quot;"));
1937                      break;
1938              }
1939              mark = idx + 1;
1940          }
1941      }
1942      // Copy the remainder to the output string before returning.
1943      CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1944      if (NULL != remainder) {
1945          CFStringAppend(newString, remainder);
1946          CFRelease(remainder);
1947      }
1948      
1949      CFRelease(startChars);
1950      return newString;
1951  }
1952  
1953  CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1954      CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1955  
1956      CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */
1957      CFStringRef sub;
1958      CFIndex lastChunkStart, length = CFStringGetLength(string);
1959      CFIndex i, entityStart;
1960      UniChar uc;
1961      UInt32 entity;
1962      int base;
1963      CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1964  
1965      CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1966      CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1967      CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1968      CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1969      CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1970  
1971      CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
1972      CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
1973  
1974      lastChunkStart = 0;
1975      // Scan through the string in its entirety
1976      for(i = 0; i < length; ) {
1977          uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;	// grab the next character and move i.
1978  
1979          if(uc == '&') {
1980              entityStart = i - 1;
1981              entity = 0xFFFF;	// set this to a not-Unicode character as sentinel
1982                               // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1983              if(lastChunkStart < i - 1) {
1984                  sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
1985                  CFStringAppend(newString, sub);
1986                  CFRelease(sub);
1987              }
1988  
1989              uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;	// grab the next character and move i.
1990                                                                             // Now we can process the entity reference itself
1991              if(uc == '#') {	// this is a numeric entity.
1992                  base = 10;
1993                  entity = 0;
1994                  uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1995  
1996                  if(uc == 'x') {	// only lowercase x allowed. Translating numeric entity as hexadecimal.
1997                      base = 16;
1998                      uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1999                  }
2000  
2001                  // process the provided digits 'til we're finished
2002                  while(true) {
2003                      if (uc >= '0' && uc <= '9')
2004                          entity = entity * base + (uc-'0');
2005                      else if (uc >= 'a' && uc <= 'f' && base == 16)
2006                          entity = entity * base + (uc-'a'+10);
2007                      else if (uc >= 'A' && uc <= 'F' && base == 16)
2008                          entity = entity * base + (uc-'A'+10);
2009                      else break;
2010  
2011                      if (i < length) {
2012                          uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2013                      }
2014                      else
2015                          break;
2016                  }
2017              }
2018  
2019              // Scan to the end of the entity
2020              while(uc != ';' && i < length) {
2021                  uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2022              }
2023  
2024              if(0xFFFF != entity) { // it was numeric, and translated.
2025                  // Now, output the result fo the entity
2026                  if(entity >= 0x10000) {
2027                      UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
2028                      CFStringAppendCharacters(newString, characters, 2);
2029                  } else {
2030                      UniChar character = entity;
2031                      CFStringAppendCharacters(newString, &character, 1);
2032                  }
2033              } else {	// it wasn't numeric.
2034                  sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2035                  CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub);
2036                  if(replacementString) {
2037                      CFStringAppend(newString, replacementString);
2038                  } else {
2039                      CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary
2040                      sub =  CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ;
2041                      CFStringAppend(newString, sub); // ...and append that.
2042                  }
2043                  CFRelease(sub); // in either case, release the most-recent "sub"
2044              }
2045  
2046              // move the lastChunkStart to the beginning of the next chunk.
2047              lastChunkStart = i;
2048          }
2049      }
2050      if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on.
2051          sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
2052          CFStringAppend(newString, sub);
2053          CFRelease(sub);
2054      }
2055  
2056      CFRelease(fullReplDict);
2057  
2058      return newString;
2059  }
2060  
2061  #pragma GCC diagnostic pop