/ CFStringScanner.c
CFStringScanner.c
  1  /*
  2   * Copyright (c) 2015 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. Please obtain a copy of the License at
 10   * http://www.opensource.apple.com/apsl/ and read it before using this
 11   * file.
 12   *
 13   * The Original Code and all software distributed under the License are
 14   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 15   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 16   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 17   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 18   * Please see the License for the specific language governing rights and
 19   * limitations under the License.
 20   *
 21   * @APPLE_LICENSE_HEADER_END@
 22   */
 23  
 24  /*	CFStringScanner.c
 25  	Copyright (c) 1999-2014, Apple Inc. All rights reserved.
 26  	Responsibility: Ali Ozer
 27  */
 28  
 29  #include "CFInternal.h"
 30  #include <CoreFoundation/CFString.h>
 31  #include <sys/types.h>
 32  #include <limits.h>
 33  #include <stdlib.h>
 34  #include <string.h>
 35  
 36  CF_INLINE Boolean __CFCharacterIsADigit(UniChar ch) {
 37      return (ch >= '0' && ch <= '9') ? true : false;
 38  }
 39  
 40  /* Returns -1 on illegal value */
 41  CF_INLINE SInt32 __CFCharacterNumericOrHexValue (UniChar ch) {
 42      if (ch >= '0' && ch <= '9') {
 43          return ch - '0';
 44      } else if (ch >= 'A' && ch <= 'F') {
 45          return ch + 10 - 'A';
 46      } else if (ch >= 'a' && ch <= 'f') {
 47          return ch + 10 - 'a';
 48      } else {
 49          return -1;
 50      }
 51  }
 52                 
 53  /* Returns -1 on illegal value */
 54  CF_INLINE SInt32 __CFCharacterNumericValue(UniChar ch) {
 55      return (ch >= '0' && ch <= '9') ? (ch - '0') : -1;
 56  }
 57  
 58  CF_INLINE UniChar __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(CFStringInlineBuffer *buf, SInt32 *indexPtr) {
 59      UniChar ch;
 60      while (__CFIsWhitespace(ch = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr))) (*indexPtr)++;
 61      return ch;
 62  }
 63  
 64  /* result is int64_t or int, depending on doLonglong
 65  */
 66  CF_PRIVATE Boolean __CFStringScanInteger(CFStringInlineBuffer *buf, CFTypeRef locale, SInt32 *indexPtr, Boolean doLonglong, void *result) {
 67      Boolean doingLonglong = false;	/* Set to true if doLonglong, and we overflow an int... */
 68      Boolean neg = false;
 69      int intResult = 0;
 70      register int64_t longlongResult = 0;	/* ??? int64_t is slow when not in regs; I hope this does the right thing. */
 71      UniChar ch;
 72  
 73      ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
 74  
 75      if (ch == '-' || ch == '+') {
 76  	neg = (ch == '-');
 77  	(*indexPtr)++;
 78      	ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
 79      }	
 80  
 81      if (! __CFCharacterIsADigit(ch)) return false;	/* No digits, bail out... */
 82      do {
 83  	if (doingLonglong) {
 84              if ((longlongResult >= LLONG_MAX / 10) && ((longlongResult > LLONG_MAX / 10) || (__CFCharacterNumericValue(ch) - (neg ? 1 : 0) >= LLONG_MAX - longlongResult * 10))) {
 85                  /* ??? This might not handle LLONG_MIN correctly... */
 86                  longlongResult = neg ? LLONG_MIN : LLONG_MAX;
 87                  neg = false;
 88                  while (__CFCharacterIsADigit(ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr))));	/* Skip remaining digits */
 89              } else {
 90                  longlongResult = longlongResult * 10 + __CFCharacterNumericValue(ch);
 91                  ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
 92              }
 93  	} else {
 94              if ((intResult >= INT_MAX / 10) && ((intResult > INT_MAX / 10) || (__CFCharacterNumericValue(ch) - (neg ? 1 : 0) >= INT_MAX - intResult * 10))) {
 95                  // Overflow, check for int64_t...
 96                  if (doLonglong) {
 97                      longlongResult = intResult;
 98                      doingLonglong = true;
 99                  } else {
100                      /* ??? This might not handle INT_MIN correctly... */
101                      intResult = neg ? INT_MIN : INT_MAX;
102                      neg = false;
103                      while (__CFCharacterIsADigit(ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr))));	/* Skip remaining digits */
104                  }
105              } else {
106                  intResult = intResult * 10 + __CFCharacterNumericValue(ch);
107                  ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
108              }
109  	}
110      } while (__CFCharacterIsADigit(ch));
111  
112      if (result) {
113          if (doLonglong) {
114  	    if (!doingLonglong) longlongResult = intResult;
115  	    *(int64_t *)result = neg ? -longlongResult : longlongResult;
116  	} else {
117  	    *(int *)result = neg ? -intResult : intResult;
118  	}
119      }
120  
121      return true;
122  }
123  
124  CF_PRIVATE Boolean __CFStringScanHex(CFStringInlineBuffer *buf, SInt32 *indexPtr, unsigned *result) {
125      UInt32 value = 0;
126      SInt32 curDigit;
127      UniChar ch;
128  
129      ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
130      /* Ignore the optional "0x" or "0X"; if it's followed by a non-hex, just parse the "0" and leave pointer at "x" */
131      if (ch == '0') {
132  	ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
133          if (ch == 'x' || ch == 'X') ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
134  	curDigit = __CFCharacterNumericOrHexValue(ch);
135          if (curDigit == -1) {
136  	    (*indexPtr)--;	/* Go back over the "x" or "X" */
137  	    if (result) *result = 0;
138              return true;	/* We just saw "0" */
139          }
140      } else {
141  	curDigit = __CFCharacterNumericOrHexValue(ch);
142          if (curDigit == -1) return false;
143      }    
144  
145      do {
146          if (value > (UINT_MAX >> 4)) {	
147  	    value = UINT_MAX;	/* We do this over and over again, but it's an error case anyway */
148          } else {
149              value = (value << 4) + curDigit;
150          }
151  	curDigit = __CFCharacterNumericOrHexValue(__CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr)));
152      } while (curDigit != -1);
153  
154      if (result) *result = value;
155      return true;
156  }
157  
158  // Packed array of Boolean
159  static const unsigned char __CFNumberSet[16] = {
160      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  nul soh stx etx eot enq ack bel
161      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  bs  ht  nl  vt  np  cr  so  si
162      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  dle dc1 dc2 dc3 dc4 nak syn etb
163      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  can em  sub esc fs  gs  rs  us
164      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  sp   !   "   #   $   %   &   '
165      0X68, // 0, 0, 0, 1, 0, 1, 1, 0, //  (   )   *   +   ,   -   .   /
166      0xFF, // 1, 1, 1, 1, 1, 1, 1, 1, //  0   1   2   3   4   5   6   7
167      0X03, // 1, 1, 0, 0, 0, 0, 0, 0, //  8   9   :   ;   <   =   >   ?
168      0X20, // 0, 0, 0, 0, 0, 1, 0, 0, //  @   A   B   C   D   E   F   G
169      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  H   I   J   K   L   M   N   O
170      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  P   Q   R   S   T   U   V   W
171      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  X   Y   Z   [   \   ]   ^   _
172      0X20, // 0, 0, 0, 0, 0, 1, 0, 0, //  `   a   b   c   d   e   f   g
173      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  h   i   j   k   l   m   n   o
174      0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  p   q   r   s   t   u   v   w
175      0X00, // 0, 0, 0, 0, 0, 0, 0, 0  //  x   y   z   {   |   }   ~  del
176  };
177  
178  CF_PRIVATE Boolean __CFStringScanDouble(CFStringInlineBuffer *buf, CFTypeRef locale, SInt32 *indexPtr, double *resultPtr) {
179      #define STACK_BUFFER_SIZE 256
180      #define ALLOC_CHUNK_SIZE 256 // first and subsequent malloc size.  Should be greater than STACK_BUFFER_SIZE
181      char localCharBuffer[STACK_BUFFER_SIZE];
182      char *charPtr = localCharBuffer;
183      char *endCharPtr;
184      SInt32 numChars = 0;
185      SInt32 capacity = STACK_BUFFER_SIZE;	// in chars
186      double result;
187      UniChar ch;
188      CFAllocatorRef tmpAlloc = NULL;
189  
190      ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
191      // At this point indexPtr points at the first non-space char
192  #if 0
193  #warning need to allow, case insensitively, all of: "nan", "inf", "-inf", "+inf", "-infinity", "+infinity", "infinity";
194  #warning -- strtod() will actually do most or all of that for us
195  #define BITSFORDOUBLENAN	((uint64_t)0x7ff8000000000000ULL)
196  #define BITSFORDOUBLEPOSINF	((uint64_t)0x7ff0000000000000ULL)
197  #define BITSFORDOUBLENEGINF	((uint64_t)0xfff0000000000000ULL)
198      if ('N' == ch || 'n' == ch) {	// check for "NaN", case insensitively
199          UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
200          UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
201          if (('a' == next1 || 'A' == next1) &&
202              ('N' == next2 || 'n' == next2)) {
203              *indexPtr += 3;
204              if (resultPtr) *(uint64_t *)resultPtr = BITSFORDOUBLENAN;
205              return true;
206          }
207      }
208      if ('I' == ch || 'i' == ch) {	// check for "Inf", case insensitively
209          UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
210          UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
211          if (('n' == next1 || 'N' == next1) &&
212              ('f' == next2 || 'F' == next2)) {
213              *indexPtr += 3;
214              if (resultPtr) *(uint64_t *)resultPtr = BITSFORDOUBLEPOSINF;
215              return true;
216          }
217      }
218      if ('+' == ch || '-' == ch) {	// check for "+/-Inf", case insensitively
219          UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
220          UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
221          UniChar next3 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 3);
222          if (('I' == next1 || 'i' == next1) &&
223              ('n' == next2 || 'N' == next2) &&
224              ('f' == next3 || 'F' == next3)) {
225              *indexPtr += 4;
226              if (resultPtr) *(uint64_t *)resultPtr = ('-' == ch) ? BITSFORDOUBLENEGINF : BITSFORDOUBLEPOSINF;
227              return true;
228          }
229      }
230  #endif // 0
231      // Get characters until one not in __CFNumberSet[] is encountered
232      while ((ch < 128) && (__CFNumberSet[ch >> 3] & (1 << (ch & 7)))) {
233          if (numChars >= capacity - 1) {
234  	    capacity += ALLOC_CHUNK_SIZE;
235  	    if (tmpAlloc == NULL) tmpAlloc = __CFGetDefaultAllocator();
236  	    if (charPtr == localCharBuffer) {
237  		charPtr = (char *)CFAllocatorAllocate(tmpAlloc, capacity * sizeof(char), 0);
238  		memmove(charPtr, localCharBuffer, numChars * sizeof(char));
239   	    } else {
240  		charPtr = (char *)CFAllocatorReallocate(tmpAlloc, charPtr, capacity * sizeof(char), 0);
241  	    }
242          }
243  	charPtr[numChars++] = (char)ch;
244  	ch = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + numChars);
245      };
246      charPtr[numChars] = 0;	// Null byte for strtod
247  
248      result = strtod_l(charPtr, &endCharPtr, NULL);
249  
250      if (tmpAlloc) CFAllocatorDeallocate(tmpAlloc, charPtr);
251      if (charPtr == endCharPtr) return false;
252      *indexPtr += (endCharPtr - charPtr);
253      if (resultPtr) *resultPtr = result; // only store result if we succeed
254      
255      return true;
256  }
257  
258  
259  #undef STACK_BUFFER_SIZE
260  #undef ALLOC_CHUNK_SIZE
261  
262