/ CFStringUtilities.c
CFStringUtilities.c
  1  /*
  2   * Copyright (c) 2015 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. Please obtain a copy of the License at
 10   * http://www.opensource.apple.com/apsl/ and read it before using this
 11   * file.
 12   *
 13   * The Original Code and all software distributed under the License are
 14   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 15   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 16   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 17   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 18   * Please see the License for the specific language governing rights and
 19   * limitations under the License.
 20   *
 21   * @APPLE_LICENSE_HEADER_END@
 22   */
 23  
 24  /*	CFStringUtilities.c
 25  	Copyright (c) 1999-2014, Apple Inc. All rights reserved.
 26  	Responsibility: Aki Inoue
 27  */
 28  
 29  #include "CFInternal.h"
 30  #include <CoreFoundation/CFStringEncodingConverterExt.h>
 31  #include <CoreFoundation/CFUniChar.h>
 32  #include <CoreFoundation/CFStringEncodingExt.h>
 33  #include "CFStringEncodingDatabase.h"
 34  #include "CFICUConverters.h"
 35  #include <limits.h>
 36  #include <stdlib.h>
 37  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 38  #include <unicode/ucol.h>
 39  #include <unicode/ucoleitr.h>
 40  #endif
 41  #include <string.h>
 42  
 43  #if  DEPLOYMENT_TARGET_WINDOWS
 44  #include <tchar.h>
 45  #endif
 46  
 47  
 48  Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
 49      switch (theEncoding) {
 50          case kCFStringEncodingASCII: // Built-in encodings
 51          case kCFStringEncodingMacRoman:
 52          case kCFStringEncodingUTF8:
 53          case kCFStringEncodingNonLossyASCII:
 54          case kCFStringEncodingWindowsLatin1:
 55          case kCFStringEncodingNextStepLatin:
 56          case kCFStringEncodingUTF16:
 57          case kCFStringEncodingUTF16BE:
 58          case kCFStringEncodingUTF16LE:
 59          case kCFStringEncodingUTF32:
 60          case kCFStringEncodingUTF32BE:
 61          case kCFStringEncodingUTF32LE:
 62              return true;
 63  
 64          default:
 65              return CFStringEncodingIsValidEncoding(theEncoding);
 66      }
 67  }
 68  
 69  const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
 70      return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings();
 71  }
 72  
 73  CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
 74      static CFMutableDictionaryRef mappingTable = NULL;
 75      static OSSpinLock mappingTableLock = OS_SPINLOCK_INIT;
 76  
 77      CFStringRef theName = NULL;
 78  
 79      if (mappingTable) {
 80          OSSpinLockLock(&mappingTableLock);
 81          theName = (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding);
 82          OSSpinLockUnlock(&mappingTableLock);
 83      }
 84  
 85      if (!theName) {
 86          const char *encodingName = __CFStringEncodingGetName(theEncoding);
 87          
 88          if (encodingName) {
 89              theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII);
 90          }
 91          
 92          if (theName) {
 93              OSSpinLockLock(&mappingTableLock);
 94  
 95              CFStringRef result = NULL;
 96              if (!mappingTable) {
 97                  mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
 98              } else {    // Check to see if this got in the dictionary in the meantime
 99                  result = (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding);
100              }
101              if (!result) {  // If not, add it in
102                  CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName);
103                  OSSpinLockUnlock(&mappingTableLock);
104                  CFRelease(theName);
105              } else {        // Otherwise use the one already in there
106                  OSSpinLockUnlock(&mappingTableLock);
107                  CFRelease(theName);
108                  theName = result;
109              }
110          }
111      }
112  
113      return theName;
114  }
115  
116  CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
117      CFStringEncoding encoding = kCFStringEncodingInvalidId;
118  #define BUFFER_SIZE (100)
119      char buffer[BUFFER_SIZE];
120      const char *name = CFStringGetCStringPtr(charsetName, __CFStringGetEightBitStringEncoding());
121  
122      if (NULL == name) {
123          if (false == CFStringGetCString(charsetName, buffer, BUFFER_SIZE, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId;
124  
125          name = buffer;
126      }
127  
128      encoding = __CFStringEncodingGetFromCanonicalName(name);
129  
130  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
131      if (kCFStringEncodingInvalidId == encoding) encoding = __CFStringEncodingGetFromICUName(name);
132  #endif
133      
134  
135      // handling Java name variant for MS codepages
136      if ((kCFStringEncodingInvalidId == encoding) && !strncasecmp(name, "ms950", strlen("ms950"))) { // <rdar://problem/12903398> “MS950” is not recognized
137          encoding = __CFStringEncodingGetFromCanonicalName("cp950");
138      }
139      
140      return encoding;
141  }
142  
143  CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
144      CFStringRef name = NULL;
145      CFIndex value = encoding;
146      static CFMutableDictionaryRef mappingTable = NULL;
147      static CFLock_t lock = CFLockInit;
148  
149      __CFLock(&lock);
150      name = ((NULL == mappingTable) ? NULL : (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)value));
151  
152      if (NULL == name) {
153  #define STACK_BUFFER_SIZE (100)
154          char buffer[STACK_BUFFER_SIZE];
155  
156          if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) name = CFStringCreateWithCString(NULL, buffer, kCFStringEncodingASCII);
157  
158  
159          if (NULL != name) {
160              CFIndex value = encoding;
161  
162              if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
163  
164              CFDictionaryAddValue(mappingTable, (const void*)value, (const void*)name);
165              CFRelease(name);
166          }
167      }
168      __CFUnlock(&lock);
169  
170      return name;
171  }
172  
173  #ifndef __OBJC__
174  enum {
175      NSASCIIStringEncoding = 1,		/* 0..127 only */
176      NSNEXTSTEPStringEncoding = 2,
177      NSJapaneseEUCStringEncoding = 3,
178      NSUTF8StringEncoding = 4,
179      NSISOLatin1StringEncoding = 5,
180      NSSymbolStringEncoding = 6,
181      NSNonLossyASCIIStringEncoding = 7,
182      NSShiftJISStringEncoding = 8,
183      NSISOLatin2StringEncoding = 9,
184      NSUnicodeStringEncoding = 10,
185      NSWindowsCP1251StringEncoding = 11,    /* Cyrillic; same as AdobeStandardCyrillic */
186      NSWindowsCP1252StringEncoding = 12,    /* WinLatin1 */
187      NSWindowsCP1253StringEncoding = 13,    /* Greek */
188      NSWindowsCP1254StringEncoding = 14,    /* Turkish */
189      NSWindowsCP1250StringEncoding = 15,    /* WinLatin2 */
190      NSISO2022JPStringEncoding = 21,         /* ISO 2022 Japanese encoding for e-mail */
191      NSMacOSRomanStringEncoding = 30,
192  
193      NSProprietaryStringEncoding = 65536    /* Installation-specific encoding */
194  };
195  #endif
196  
197  #define NSENCODING_MASK (1 << 31)
198  
199  unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
200      switch (theEncoding & 0xFFF) {
201          case kCFStringEncodingUnicode:
202              if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
203              else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
204              break;
205  
206          case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
207          case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
208  
209          case kCFStringEncodingASCII: return NSASCIIStringEncoding;
210  
211          case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
212          case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
213          case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
214          case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
215          case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
216          case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
217  
218          case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
219          case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
220          case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
221          case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
222          case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
223          case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
224      }
225  
226      return NSENCODING_MASK | theEncoding;
227  }
228  
229  CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) {
230      const uint16_t encodings[] = {
231          kCFStringEncodingASCII,
232          kCFStringEncodingNextStepLatin,
233          kCFStringEncodingEUC_JP,
234          0,
235          kCFStringEncodingISOLatin1,
236          kCFStringEncodingMacSymbol,
237          kCFStringEncodingNonLossyASCII,
238          kCFStringEncodingDOSJapanese,
239          kCFStringEncodingISOLatin2,
240          kCFStringEncodingUTF16,
241          kCFStringEncodingWindowsCyrillic,
242          kCFStringEncodingWindowsLatin1,
243          kCFStringEncodingWindowsGreek,
244          kCFStringEncodingWindowsLatin5,
245          kCFStringEncodingWindowsLatin2
246      };
247  
248      if (NSUTF8StringEncoding == theEncoding) return kCFStringEncodingUTF8;
249  
250      if ((theEncoding > 0) && (theEncoding <= NSWindowsCP1250StringEncoding)) return encodings[theEncoding - 1];
251  
252      switch (theEncoding) {
253          case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
254          case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
255  
256          default:
257              return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
258      }
259  }
260  
261  UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
262      uint16_t codepage = __CFStringEncodingGetWindowsCodePage(theEncoding);
263  
264      return ((0 == codepage) ? kCFStringEncodingInvalidId : codepage);
265  }
266  
267  CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
268      return __CFStringEncodingGetFromWindowsCodePage(theEncoding);
269  }
270  
271  CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
272      CFStringEncoding macEncoding = __CFStringEncodingGetMostCompatibleMacScript(encoding);
273  
274  
275      return macEncoding;
276  }
277  
278  #define kCFStringCompareAllocationIncrement (128)
279  
280  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
281  
282  // -------------------------------------------------------------------------------------------------
283  //	CompareSpecials - ignore case & diacritic differences
284  //
285  //	Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
286  //	Fullwidth & halfwidth are in range FF00-FFEF
287  //	Parenthesized & circled are in range 3200-32FF
288  // -------------------------------------------------------------------------------------------------
289  
290  enum {
291  	kUpperCaseWeightMin	= 0x80 | 0x0F,
292  	kUpperCaseWeightMax	= 0x80 | 0x17,
293  	kUpperToLowerDelta	= 0x80 | 0x0A,	// 0x0A = 0x0F - 0x05
294  	kMaskPrimarySecondary	= 0xFFFFFF00,
295  	kMaskPrimaryOnly	= 0xFFFF0000,
296  	kMaskSecondaryOnly	= 0x0000FF00,
297  	kMaskCaseTertiary	= 0x000000FF	// 2 hi bits case, 6 lo bits tertiary
298  };
299  
300  static SInt32 __CompareSpecials(const UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length) {
301  	UErrorCode icuStatus = U_ZERO_ERROR;
302  	SInt32	orderWidth = 0;
303  	SInt32	orderCompos = 0;
304  
305  	UCollationElements * collElems1 = ucol_openElements(collator, (const UChar *)text1Ptr, text1Length, &icuStatus);
306  	UCollationElements * collElems2 = ucol_openElements(collator, (const UChar *)text2Ptr, text2Length, &icuStatus);
307  	if (U_SUCCESS(icuStatus)) {
308  		int32_t	startOffset1 = 0;
309  		int32_t	startOffset2 = 0;
310  		
311  		while (true) {
312  			int32_t	elemOrder1, elemOrder2;
313  			int32_t	offset1, offset2;
314  			
315  			elemOrder1 = ucol_next(collElems1, &icuStatus);
316  			elemOrder2 = ucol_next(collElems2, &icuStatus);
317  			if ( U_FAILURE(icuStatus) || elemOrder1 == (int32_t)UCOL_NULLORDER || elemOrder2 == (int32_t)UCOL_NULLORDER ) {
318  				break;
319  			}
320  
321  			offset1 = ucol_getOffset(collElems1);
322  			offset2 = ucol_getOffset(collElems2);
323  			if ( (elemOrder1 & kMaskPrimarySecondary) == (elemOrder2 & kMaskPrimarySecondary) ) {
324  				if ( (elemOrder1 & kMaskPrimaryOnly) != 0 ) {
325  					// keys may differ in case, width, circling, etc.
326  
327  					int32_t	tertiary1 = (elemOrder1 & kMaskCaseTertiary);
328  					int32_t tertiary2 = (elemOrder2 & kMaskCaseTertiary);
329  					// fold upper to lower case
330  					if (tertiary1 >= kUpperCaseWeightMin && tertiary1 <= kUpperCaseWeightMax) {
331  						tertiary1 -= kUpperToLowerDelta;
332  					}
333  					if (tertiary2 >= kUpperCaseWeightMin && tertiary2 <= kUpperCaseWeightMax) {
334  						tertiary2 -= kUpperToLowerDelta;
335  					}
336  					// now compare
337  					if (tertiary1 != tertiary2) {
338  						orderWidth = (tertiary1 < tertiary2)? -1: 1;
339  						break;
340  					}
341  
342  				} else if ( (elemOrder1 & kMaskSecondaryOnly) != 0 ) {
343  					// primary weights are both zero, but secondaries are not.
344  					if ( orderCompos == 0 && (options & kCFCompareNonliteral) == 0 ) {
345  						// We have a code element which is a diacritic.
346  						// It may have come from a composed char or a combining char.
347  						// If it came from a combining char (longer element length) it sorts first.
348  						// This is only an approximation to what the Mac OS 9 code did, but this is an
349  						// unusual case anyway.
350  						int32_t	elem1Length = offset1 - startOffset1;
351  						int32_t	elem2Length = offset2 - startOffset2;
352  						if (elem1Length != elem2Length) {
353  							orderCompos = (elem1Length > elem2Length)? -1: 1;
354  						}
355  					}
356  				}
357  			}
358  			
359  			startOffset1 = offset1;
360  			startOffset2 = offset2;
361  		}
362  		ucol_closeElements(collElems1);
363  		ucol_closeElements(collElems2);
364  	}
365  	
366  	return (orderWidth != 0)? orderWidth: orderCompos;
367  }
368  
369  static SInt32 __CompareCodePoints(const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length ) {
370  	const UniChar *	text1P = text1Ptr;
371  	const UniChar *	text2P = text2Ptr;
372  	UInt32		textLimit = (text1Length <= text2Length)? text1Length: text2Length;
373  	UInt32		textCounter;
374  	SInt32		orderResult = 0;
375  
376  	// Loop through either string...the first difference differentiates this.
377  	for (textCounter = 0; textCounter < textLimit && *text1P == *text2P; textCounter++) {
378  		text1P++;
379  		text2P++;
380  	}
381  	if (textCounter < textLimit) {
382  		// code point difference
383  		orderResult = (*text1P < *text2P) ? -1 : 1;
384  	} else if (text1Length != text2Length) {
385  		// one string has extra stuff at end
386  		orderResult = (text1Length < text2Length) ? -1 : 1;
387  	}
388  	return orderResult;
389  }
390  
391  
392  extern const CFStringRef __kCFLocaleCollatorID;
393  
394  static UCollator *__CFStringCreateCollator(CFLocaleRef compareLocale) {
395      CFStringRef canonLocaleCFStr = (CFStringRef)CFLocaleGetValue(compareLocale, __kCFLocaleCollatorID);
396      char icuLocaleStr[128] = {0};
397      CFStringGetCString(canonLocaleCFStr, icuLocaleStr, sizeof(icuLocaleStr), kCFStringEncodingASCII);
398      UErrorCode icuStatus = U_ZERO_ERROR;
399      UCollator * collator = ucol_open(icuLocaleStr, &icuStatus);
400      ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
401      ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &icuStatus);
402      ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
403      ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
404      ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
405      return collator;
406  }
407  
408  #define kCFMaxCachedDefaultCollators (8)
409  static UCollator *__CFDefaultCollators[kCFMaxCachedDefaultCollators];
410  static CFIndex __CFDefaultCollatorsCount = 0;
411  static const void *__CFDefaultCollatorLocale = NULL;
412  static CFLock_t __CFDefaultCollatorLock = CFLockInit;
413  
414  static UCollator *__CFStringCopyDefaultCollator(CFLocaleRef compareLocale) {
415      CFLocaleRef currentLocale = NULL;
416      UCollator * collator = NULL;
417  
418      if (compareLocale != __CFDefaultCollatorLocale) {
419          currentLocale = CFLocaleCopyCurrent();
420          if (compareLocale != currentLocale) {
421  	    CFRelease(currentLocale);
422  	    return NULL;
423  	}
424      }
425  
426      __CFLock(&__CFDefaultCollatorLock);
427      if ((NULL != currentLocale) && (__CFDefaultCollatorLocale != currentLocale)) {
428          while (__CFDefaultCollatorsCount > 0) ucol_close(__CFDefaultCollators[--__CFDefaultCollatorsCount]);
429          __CFDefaultCollatorLocale = CFRetain(currentLocale);
430      }
431  
432      if (__CFDefaultCollatorsCount > 0) collator = __CFDefaultCollators[--__CFDefaultCollatorsCount];
433      __CFUnlock(&__CFDefaultCollatorLock);
434  
435      if (NULL == collator) {
436  	collator = __CFStringCreateCollator(compareLocale);
437      }
438  
439      if (NULL != currentLocale) CFRelease(currentLocale);
440  
441      return collator;
442  }
443  
444  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
445  static void __collatorFinalize(UCollator *collator) {
446      CFLocaleRef locale = _CFGetTSD(__CFTSDKeyCollatorLocale);
447      _CFSetTSD(__CFTSDKeyCollatorUCollator, NULL, NULL);
448      _CFSetTSD(__CFTSDKeyCollatorLocale, NULL, NULL);
449      __CFLock(&__CFDefaultCollatorLock);
450      if ((__CFDefaultCollatorLocale == locale) && (__CFDefaultCollatorsCount < kCFMaxCachedDefaultCollators)) {
451          __CFDefaultCollators[__CFDefaultCollatorsCount++] = collator;
452          collator = NULL;
453      }
454      __CFUnlock(&__CFDefaultCollatorLock);
455      if (NULL != collator) ucol_close(collator);
456      if (locale) CFRelease(locale);
457  }
458  #endif
459  
460  // -------------------------------------------------------------------------------------------------
461  // __CompareTextDefault
462  // 
463  // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
464  // A negative value indicates that text1 sorts before text2.
465  // -------------------------------------------------------------------------------------------------
466  static OSStatus __CompareTextDefault(UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length, Boolean *equivalentP, SInt32 *orderP) {
467  
468  	// collator must have default settings restored on exit from this function
469  
470  	*equivalentP = true;
471  	*orderP = 0;	
472  
473  	if (options & kCFCompareNumerically) {
474  	    UErrorCode icuStatus = U_ZERO_ERROR;	
475  	    ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &icuStatus);
476  	}
477  
478  	// Most string differences are Primary. Do a primary check first, then if there
479  	// are no differences do a comparison with the options in the collator.
480  	UCollationResult icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
481  	if (icuResult != UCOL_EQUAL) {
482  		*orderP = (icuResult == UCOL_LESS) ? -2 : 2;
483  	}
484  	if (*orderP == 0) {
485  		UErrorCode icuStatus = U_ZERO_ERROR;	
486                  ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
487                  ucol_setAttribute(collator, UCOL_STRENGTH, (options & kCFCompareDiacriticInsensitive) ? UCOL_PRIMARY : UCOL_SECONDARY, &icuStatus);
488                  ucol_setAttribute(collator, UCOL_CASE_LEVEL, (options & kCFCompareCaseInsensitive) ? UCOL_OFF : UCOL_ON, &icuStatus);
489  		if (!U_SUCCESS(icuStatus)) {
490  		    icuStatus = U_ZERO_ERROR;
491  		    ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
492  		    ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
493  		    ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
494  		    ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
495  		    return 666;
496  		}
497  
498  		// We don't have a primary difference. Recompare with standard collator.
499  		icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
500  		if (icuResult != UCOL_EQUAL) {
501  			*orderP = (icuResult == UCOL_LESS) ? -1 : 1;
502  		}
503  		icuStatus = U_ZERO_ERROR;
504                  ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
505  		ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
506  		ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
507  	}
508  	if (*orderP == 0 && (options & kCFCompareNonliteral) == 0) {
509  		*orderP = __CompareSpecials(collator, options, text1Ptr, text1Length, text2Ptr, text2Length);
510  	}
511  
512  	*equivalentP = (*orderP == 0);
513  
514  	// If strings are equivalent but we care about order and have not yet checked
515  	// to the level of code point order, then do some more checks for order
516  	if (*orderP == 0) {
517  		UErrorCode icuStatus = U_ZERO_ERROR;	
518  		// First try to see if ICU can find any differences above code point level
519                  ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
520  		ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &icuStatus);
521  		ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &icuStatus);
522  		if (!U_SUCCESS(icuStatus)) {
523  		    icuStatus = U_ZERO_ERROR;
524  		    ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
525  		    ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
526  		    ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
527  		    ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
528  		    return 666;
529  		}
530  		icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
531  		if (icuResult != UCOL_EQUAL) {
532  			*orderP = (icuResult == UCOL_LESS) ? -1 : 1;
533  		} else {
534  			// no ICU differences above code point level, compare code points
535  			*orderP = __CompareCodePoints( text1Ptr, text1Length, text2Ptr, text2Length );
536  		}
537  		icuStatus = U_ZERO_ERROR;
538                  ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
539  		ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
540  		ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
541  	}
542  
543  	if (options & kCFCompareNumerically) {
544  	    UErrorCode icuStatus = U_ZERO_ERROR;	
545  	    ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
546  	}
547  	return 0; // noErr
548  }
549  
550  #endif // DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
551  
552  static inline CFIndex __extendLocationBackward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *nonBaseBMP, const uint8_t *punctBMP) {
553      while (location > 0) {
554          UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
555          UTF32Char otherChar;
556          if (CFUniCharIsSurrogateLowCharacter(ch) && CFUniCharIsSurrogateHighCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location - 1)))) {
557              ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
558              uint8_t planeNo = (ch >> 16);
559              if ((planeNo > 1) || (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)))) break;
560              location -= 2;
561          } else {
562              if ((!CFUniCharIsMemberOfBitmap(ch, nonBaseBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
563              --location;
564          }
565      }
566  
567      return location;
568  }
569  
570  static inline CFIndex __extendLocationForward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *alnumBMP, const uint8_t *punctBMP, const uint8_t *controlBMP, CFIndex strMax) {
571      do {
572          UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
573          UTF32Char otherChar;
574          if (CFUniCharIsSurrogateHighCharacter(ch) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location + 1)))) {
575              ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
576              location += 2;
577              uint8_t planeNo = (ch >> 16);
578              if (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, planeNo))) break;
579          } else {
580              ++location;
581              if ((!CFUniCharIsMemberOfBitmap(ch, alnumBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP) && !CFUniCharIsMemberOfBitmap(ch, controlBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
582          }
583      } while (location < strMax);
584      return location;
585  }
586  
587  CF_PRIVATE CFComparisonResult _CFCompareStringsWithLocale(CFStringInlineBuffer *str1, CFRange str1Range, CFStringInlineBuffer *str2, CFRange str2Range, CFOptionFlags options, const void *compareLocale) {
588      const UniChar *characters1;
589      const UniChar *characters2;
590      CFComparisonResult compResult = kCFCompareEqualTo;
591      CFRange range1 = str1Range;
592      CFRange range2 = str2Range;
593      SInt32 order;
594  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
595      Boolean isEqual;
596      bool forcedOrdering = ((options & kCFCompareForcedOrdering) ? true : false);
597  
598      UCollator *collator = NULL;
599      bool defaultCollator = true;
600  #endif
601      static const uint8_t *alnumBMP = NULL;
602      static const uint8_t *nonBaseBMP = NULL;
603      static const uint8_t *punctBMP = NULL;
604      static const uint8_t *controlBMP = NULL;
605      
606      if (NULL == alnumBMP) {
607  	alnumBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, 0);
608  	nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
609  	punctBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
610  	controlBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, 0);
611      }
612      
613      // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
614      
615      range1.location = str1Range.location;
616      range2.location = str2Range.location;
617      
618      // go backward
619      // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
620      if (range1.location > 0) {
621  	range1.location = __extendLocationBackward(range1.location - 1, str1, nonBaseBMP, punctBMP);
622      }
623      
624      if (range2.location > 0) {
625  	range2.location = __extendLocationBackward(range2.location - 1, str2, nonBaseBMP, punctBMP);
626      }
627      
628  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
629  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
630      // First we try to use the last one used on this thread, if the locale is the same,
631      // otherwise we try to check out a default one, or then we create one.
632      UCollator *threadCollator = _CFGetTSD(__CFTSDKeyCollatorUCollator);
633      CFLocaleRef threadLocale = _CFGetTSD(__CFTSDKeyCollatorLocale);
634      if (compareLocale == threadLocale) {
635  	collator = threadCollator;
636      } else {
637  #endif
638  	collator = __CFStringCopyDefaultCollator((CFLocaleRef)compareLocale);
639  	defaultCollator = true;
640  	if (NULL == collator) {
641  	    collator = __CFStringCreateCollator((CFLocaleRef)compareLocale);
642  	    defaultCollator = false;
643  	}
644  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
645      }
646  #endif
647  #endif
648      
649      characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
650      characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
651  
652      if ((NULL != characters1) && (NULL != characters2)) { // do fast
653  	range1.length = (str1Range.location + str1Range.length) - range1.location;
654  	range2.length = (str2Range.location + str2Range.length) - range2.location;
655  
656  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
657          if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
658              compResult = ((isEqual && !forcedOrdering) ? kCFCompareEqualTo : ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
659          } else 
660  #endif
661          {
662              compResult = ((memcmp(characters1, characters2, sizeof(UniChar) * range1.length) < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
663          }
664      } else {
665          UniChar *buffer1 = NULL;
666          UniChar *buffer2 = NULL;
667          UTF16Char sBuffer1[kCFStringCompareAllocationIncrement];
668          UTF16Char sBuffer2[kCFStringCompareAllocationIncrement];
669          CFIndex buffer1Len = 0, buffer2Len = 0;
670          CFIndex str1Max = str1Range.location + str1Range.length;
671          CFIndex str2Max = str2Range.location + str2Range.length;
672          CFIndex bufferSize;
673  
674          // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
675          do {
676              if (str1Range.location < str1Max) {
677  		str1Range.location = __extendLocationForward(str1Range.location, str1, alnumBMP, punctBMP, controlBMP, str1Max);
678                  range1.length = (str1Range.location - range1.location);
679                  characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
680  
681                  if (NULL == characters1) {
682                      if ((0 > buffer1Len) || (range1.length > kCFStringCompareAllocationIncrement)) {
683                          if (buffer1Len < range1.length) {
684                              bufferSize = range1.length + (kCFStringCompareAllocationIncrement - (range1.length % kCFStringCompareAllocationIncrement));
685                              if (0 == buffer1Len) {
686                                  buffer1 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
687                              } else if (buffer1Len < range1.length) {
688                                  buffer1 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer1, sizeof(UTF16Char) * bufferSize, 0);
689                              }
690                              buffer1Len = bufferSize;
691                          }
692                      } else {
693                          buffer1 = sBuffer1;
694                      }
695  
696                      CFStringGetCharactersFromInlineBuffer(str1, range1, buffer1);
697                      characters1 = buffer1;
698                  }
699              }
700  
701              if (str2Range.location < str2Max) {
702  		str2Range.location = __extendLocationForward(str2Range.location, str2, alnumBMP, punctBMP, controlBMP, str2Max);                
703                  range2.length = (str2Range.location - range2.location);
704                  characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
705                  
706                  if (NULL == characters2) {
707                      if ((0 > buffer2Len) || (range2.length > kCFStringCompareAllocationIncrement)) {
708                          if (buffer2Len < range2.length) {
709                              bufferSize = range2.length + (kCFStringCompareAllocationIncrement - (range2.length % kCFStringCompareAllocationIncrement));
710                              if (0 == buffer2Len) {
711                                  buffer2 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
712                              } else if (buffer2Len < range2.length) {
713                                  buffer2 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer2, sizeof(UTF16Char) * bufferSize, 0);
714                              }
715                              buffer2Len = bufferSize;
716                          }
717                      } else {
718                          buffer2 = sBuffer2;
719                      }
720  
721                      CFStringGetCharactersFromInlineBuffer(str2, range2, buffer2);
722                      characters2 = buffer2;
723                  }
724              }
725  
726  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
727              if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) ==  0 /* noErr */)) {
728                  if (isEqual) {
729                      if (forcedOrdering && (kCFCompareEqualTo == compResult) && (0 != order)) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
730                      order = 0;
731                  }
732              } else 
733  #endif
734              {
735                  order = memcmp(characters1, characters2, sizeof(UTF16Char) * ((range1.length < range2.length) ? range1.length : range2.length));
736                  if (0 == order) {
737                      if (range1.length < range2.length) {
738                          order = -2;
739                      } else if (range2.length < range1.length) {
740                          order = 2;
741                      }
742                  } else if (order < 0) {
743                      --order;
744                  } else if (order > 0) {
745                      ++order;
746                  }
747              }
748  
749              if ((order < -1) || (order > 1)) break; // the result is deterministic
750  
751              if (0 == order) {
752                  range1.location = str1Range.location;
753                  range2.location = str2Range.location;
754              }
755          } while ((str1Range.location < str1Max) || (str2Range.location < str2Max));
756  
757          if (0 != order) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
758  
759          if (buffer1Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer1);
760          if (buffer2Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer2);
761      }
762  
763  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
764      if (collator == threadCollator) {
765  	// do nothing, already cached
766      } else {
767  	if (threadLocale) __collatorFinalize((UCollator *)_CFGetTSD(__CFTSDKeyCollatorUCollator)); // need to dealloc collators
768  
769  	_CFSetTSD(__CFTSDKeyCollatorUCollator, collator, (void *)__collatorFinalize);
770  	_CFSetTSD(__CFTSDKeyCollatorLocale, (void *)CFRetain(compareLocale), NULL);
771      }
772  #endif
773      
774      return compResult;
775  }
776