/ CFStringEncodingDatabase.c
CFStringEncodingDatabase.c
  1  /*
  2   * Copyright (c) 2015 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. Please obtain a copy of the License at
 10   * http://www.opensource.apple.com/apsl/ and read it before using this
 11   * file.
 12   *
 13   * The Original Code and all software distributed under the License are
 14   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 15   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 16   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 17   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 18   * Please see the License for the specific language governing rights and
 19   * limitations under the License.
 20   *
 21   * @APPLE_LICENSE_HEADER_END@
 22   */
 23  
 24  /*	CFStringEncodingDatabase.c
 25  	Copyright (c) 2005-2014, Apple Inc. All rights reserved.
 26  	Responsibility: Aki Inoue
 27  */
 28  
 29  #include "CFInternal.h"
 30  #include <CoreFoundation/CFStringEncodingExt.h>
 31  #include "CFStringEncodingConverterPriv.h"
 32  #include "CFStringEncodingDatabase.h"
 33  #include <stdio.h>
 34  
 35  #define ISO8859CODEPAGE_BASE (28590)
 36  
 37  static const uint16_t __CFKnownEncodingList[] = {
 38      kCFStringEncodingMacRoman,
 39      kCFStringEncodingMacJapanese,
 40      kCFStringEncodingMacChineseTrad,
 41      kCFStringEncodingMacKorean,
 42      kCFStringEncodingMacArabic,
 43      kCFStringEncodingMacHebrew,
 44      kCFStringEncodingMacGreek,
 45      kCFStringEncodingMacCyrillic,
 46      kCFStringEncodingMacDevanagari,
 47      kCFStringEncodingMacGurmukhi,
 48      kCFStringEncodingMacGujarati,
 49      kCFStringEncodingMacOriya,
 50      kCFStringEncodingMacBengali,
 51      kCFStringEncodingMacTamil,
 52      kCFStringEncodingMacTelugu,
 53      kCFStringEncodingMacKannada,
 54      kCFStringEncodingMacMalayalam,
 55      kCFStringEncodingMacSinhalese,
 56      kCFStringEncodingMacBurmese,
 57      kCFStringEncodingMacKhmer,
 58      kCFStringEncodingMacThai,
 59      kCFStringEncodingMacLaotian,
 60      kCFStringEncodingMacGeorgian,
 61      kCFStringEncodingMacArmenian,
 62      kCFStringEncodingMacChineseSimp,
 63      kCFStringEncodingMacTibetan,
 64      kCFStringEncodingMacMongolian,
 65      kCFStringEncodingMacEthiopic,
 66      kCFStringEncodingMacCentralEurRoman,
 67      kCFStringEncodingMacVietnamese,
 68      kCFStringEncodingMacSymbol,
 69      kCFStringEncodingMacDingbats,
 70      kCFStringEncodingMacTurkish,
 71      kCFStringEncodingMacCroatian,
 72      kCFStringEncodingMacIcelandic,
 73      kCFStringEncodingMacRomanian,
 74      kCFStringEncodingMacCeltic,
 75      kCFStringEncodingMacGaelic,
 76      kCFStringEncodingMacFarsi,
 77      kCFStringEncodingMacUkrainian,
 78      kCFStringEncodingMacInuit,
 79      
 80      kCFStringEncodingDOSLatinUS,
 81      kCFStringEncodingDOSGreek,
 82      kCFStringEncodingDOSBalticRim,
 83      kCFStringEncodingDOSLatin1,
 84      kCFStringEncodingDOSGreek1,
 85      kCFStringEncodingDOSLatin2,
 86      kCFStringEncodingDOSCyrillic,
 87      kCFStringEncodingDOSTurkish,
 88      kCFStringEncodingDOSPortuguese,
 89      kCFStringEncodingDOSIcelandic,
 90      kCFStringEncodingDOSHebrew,
 91      kCFStringEncodingDOSCanadianFrench,
 92      kCFStringEncodingDOSArabic,
 93      kCFStringEncodingDOSNordic,
 94      kCFStringEncodingDOSRussian,
 95      kCFStringEncodingDOSGreek2,
 96      kCFStringEncodingDOSThai,
 97      kCFStringEncodingDOSJapanese,
 98      kCFStringEncodingDOSChineseSimplif,
 99      kCFStringEncodingDOSKorean,
100      kCFStringEncodingDOSChineseTrad,
101      
102      kCFStringEncodingWindowsLatin1,
103      kCFStringEncodingWindowsLatin2,
104      kCFStringEncodingWindowsCyrillic,
105      kCFStringEncodingWindowsGreek,
106      kCFStringEncodingWindowsLatin5,
107      kCFStringEncodingWindowsHebrew,
108      kCFStringEncodingWindowsArabic,
109      kCFStringEncodingWindowsBalticRim,
110      kCFStringEncodingWindowsVietnamese,
111      kCFStringEncodingWindowsKoreanJohab,
112      kCFStringEncodingASCII,
113      
114      kCFStringEncodingShiftJIS_X0213,
115      kCFStringEncodingGB_18030_2000,
116      
117      kCFStringEncodingISO_2022_JP,
118      kCFStringEncodingISO_2022_JP_2,
119      kCFStringEncodingISO_2022_JP_1,
120      kCFStringEncodingISO_2022_JP_3,
121      kCFStringEncodingISO_2022_CN,
122      kCFStringEncodingISO_2022_CN_EXT,
123      kCFStringEncodingISO_2022_KR,
124      kCFStringEncodingEUC_JP,
125      kCFStringEncodingEUC_CN,
126      kCFStringEncodingEUC_TW,
127      kCFStringEncodingEUC_KR,
128      
129      kCFStringEncodingShiftJIS,
130  
131      kCFStringEncodingKOI8_R,
132  
133      kCFStringEncodingBig5,
134  
135      kCFStringEncodingMacRomanLatin1,
136      kCFStringEncodingHZ_GB_2312,
137      kCFStringEncodingBig5_HKSCS_1999,
138      kCFStringEncodingVISCII,
139      kCFStringEncodingKOI8_U,
140      kCFStringEncodingBig5_E,
141      kCFStringEncodingUTF7_IMAP,
142      
143      kCFStringEncodingNextStepLatin,
144      
145      kCFStringEncodingEBCDIC_CP037
146  };
147  
148  // Windows codepage mapping
149  static const uint16_t __CFWindowsCPList[] = {
150      10000,
151      10001,
152      10002,
153      10003,
154      10004,
155      10005,
156      10006,
157      10007,
158      0,
159      0,
160      0,
161      0,
162      0,
163      0,
164      0,
165      0,
166      0,
167      0,
168      0,
169      0,
170      10021,
171      0,
172      0,
173      0,
174      10008,
175      0,
176      0,
177      0,
178      10029,
179      0,
180      0,
181      0,
182      10081,
183      10082,
184      10079,
185      10010,
186      0,
187      0,
188      0,
189      10017,
190      0,
191      
192      437,
193      737,
194      775,
195      850,
196      851,
197      852,
198      855,
199      857,
200      860,
201      861,
202      862,
203      863,
204      864,
205      865,
206      866,
207      869,
208      874,
209      932,
210      936,
211      949,
212      950,
213      
214      1252,
215      1250,
216      1251,
217      1253,
218      1254,
219      1255,
220      1256,
221      1257,
222      1258,
223      1361,
224  
225      20127,
226      
227      0,
228      54936,
229      
230      50221, // we prefere this over 50220/50221 since that's what CF coverter generates
231      0,
232      0,
233      0,
234      50227,
235      0,
236      50225,
237      
238      51932,
239      51936,
240      51950,
241      51949,
242      
243      0,
244  
245      20866,
246  
247      0,
248  
249      0,
250      52936,
251      0,
252      0,
253      21866,
254      0,
255      0,
256      
257      0,
258      
259      37
260  };
261  
262  // Canonical name
263  static const char *__CFCanonicalNameList[] = {
264      "macintosh",
265      "japanese",
266      "trad-chinese",
267      "korean",
268      "arabic",
269      "hebrew",
270      "greek",
271      "cyrillic",
272      "devanagari",
273      "gurmukhi",
274      "gujarati",
275      "oriya",
276      "bengali",
277      "tamil",
278      "telugu",
279      "kannada",
280      "malayalam",
281      "sinhalese",
282      "burmese",
283      "khmer",
284      "thai",
285      "laotian",
286      "georgian",
287      "armenian",
288      "simp-chinese",
289      "tibetan",
290      "mongolian",
291      "ethiopic",
292      "centraleurroman",
293      "vietnamese",
294      "symbol",
295      "dingbats",
296      "turkish",
297      "croatian",
298      "icelandic",
299      "romanian",
300      "celtic",
301      "gaelic",
302      "farsi",
303      "ukrainian",
304      "inuit",
305      
306      NULL,
307      NULL,
308      NULL,
309      NULL,
310      NULL,
311      NULL,
312      NULL,
313      NULL,
314      NULL,
315      NULL,
316      NULL,
317      NULL,
318      NULL,
319      NULL,
320      NULL,
321      NULL,
322      NULL,
323      NULL,
324      NULL,
325      NULL,
326      NULL,
327      
328      NULL,
329      NULL,
330      NULL,
331      NULL,
332      NULL,
333      NULL,
334      NULL,
335      NULL,
336      NULL,
337      NULL,
338      
339      "us-ascii",
340      
341      NULL,
342      "gb18030",
343      
344      "iso-2022-jp",
345      "iso-2022-jp-2",
346      "iso-2022-jp-1",
347      "iso-2022-jp-3",
348      "iso-2022-cn",
349      "iso-2022-cn-ext",
350      "iso-2022-kr",
351      "euc-jp",
352      "gb2312",
353      "euc-tw",
354      "euc-kr",
355      
356      "shift_jis",
357  
358      "koi8-r",
359  
360      "big5",
361  
362      "roman-latin1",
363      "hz-gb-2312",
364      "big5-hkscs",
365      "viscii",
366      "koi8-u",
367      NULL,
368      "utf7-imap",
369      
370      "x-nextstep",
371      
372      "ibm037",
373  };
374  
375  static inline CFIndex __CFGetEncodingIndex(CFStringEncoding encoding) {
376      const uint16_t *head = __CFKnownEncodingList;
377      const uint16_t *tail = head + ((sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList)) - 1);
378      const uint16_t *middle;
379  
380      encoding &= 0x0FFF;
381      while (head <= tail) {
382          middle = head + ((tail - head) >> 1);
383  
384          if (encoding == *middle) {
385              return middle - __CFKnownEncodingList;
386          } else if (encoding < *middle) {
387              tail = middle - 1;
388          } else {
389              head = middle + 1;
390          }
391      }
392  
393      return kCFNotFound;
394  }
395  
396  CF_PRIVATE uint16_t __CFStringEncodingGetWindowsCodePage(CFStringEncoding encoding) {
397      CFStringEncoding encodingBase = encoding & 0x0F00;
398  
399      if (0x0100 == encodingBase) { // UTF
400          switch (encoding) {
401              case kCFStringEncodingUTF7: return 65000;
402              case kCFStringEncodingUTF8: return 65001;
403              case kCFStringEncodingUTF16: return 1200;
404              case kCFStringEncodingUTF16BE: return 1201;
405              case kCFStringEncodingUTF32: return 65005;
406              case kCFStringEncodingUTF32BE: return 65006;
407          }        
408      } else if (0x0200 == encodingBase) { // ISO 8859 range
409          return ISO8859CODEPAGE_BASE + (encoding & 0xFF);
410      } else { // others
411          CFIndex index = __CFGetEncodingIndex(encoding);
412  
413          if (kCFNotFound != index) return __CFWindowsCPList[index];
414      }
415  
416      return 0;
417  }
418  
419  CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromWindowsCodePage(uint16_t codepage) {
420      switch (codepage) {
421          case 65001: return kCFStringEncodingUTF8;
422          case 1200: return kCFStringEncodingUTF16;
423          case 0: return kCFStringEncodingInvalidId;
424          case 1201: return kCFStringEncodingUTF16BE;
425          case 65005: return kCFStringEncodingUTF32;
426          case 65006: return kCFStringEncodingUTF32BE;
427          case 65000: return kCFStringEncodingUTF7;
428      }
429  
430      if ((codepage > ISO8859CODEPAGE_BASE) && (codepage <= (ISO8859CODEPAGE_BASE + 16))) {
431          return (codepage - ISO8859CODEPAGE_BASE) + 0x0200;
432      } else {
433          static CFMutableDictionaryRef mappingTable = NULL;
434          static CFLock_t lock = CFLockInit;
435          uintptr_t value;
436  
437          __CFLock(&lock);
438          if (NULL == mappingTable) {
439              CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
440              
441              mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
442  
443              for (index = 0;index < count;index++) {
444                  if (0 != __CFWindowsCPList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFWindowsCPList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
445              }
446          }
447          __CFUnlock(&lock);
448  
449          if (CFDictionaryGetValueIfPresent(mappingTable, (const void *)(uintptr_t)codepage, (const void **)&value)) return (CFStringEncoding)value;
450      }
451  
452  
453      return kCFStringEncodingInvalidId;
454  }
455  
456  CF_PRIVATE bool __CFStringEncodingGetCanonicalName(CFStringEncoding encoding, char *buffer, CFIndex bufferSize) {
457      const char *format = "%s";
458      const char *name = NULL;
459      uint32_t value = 0;
460      CFIndex index;
461  
462      switch (encoding & 0x0F00) {
463          case 0x0100: // UTF range
464              switch (encoding) {
465                  case kCFStringEncodingUTF7: name = "utf-7"; break;
466                  case kCFStringEncodingUTF8: name = "utf-8"; break;
467                  case kCFStringEncodingUTF16: name = "utf-16"; break;
468                  case kCFStringEncodingUTF16BE: name = "utf-16be"; break;
469                  case kCFStringEncodingUTF16LE: name = "utf-16le"; break;
470                  case kCFStringEncodingUTF32: name = "utf-32"; break;
471                  case kCFStringEncodingUTF32BE: name = "utf-32be"; break;
472                  case kCFStringEncodingUTF32LE: name = "utf-32le"; break;
473              }
474              break;
475  
476          case 0x0200: // ISO 8859 range
477              format = "iso-8859-%d";
478              value = (encoding & 0xFF);
479              break;
480  
481          case 0x0400: // DOS code page range
482          case 0x0500: // Windows code page range
483              index = __CFGetEncodingIndex(encoding);
484              
485              if (kCFNotFound != index) {
486                  value = __CFWindowsCPList[index];
487                  if (0 != value) format = ((0x0400 == (encoding & 0x0F00)) ? "cp%d" : "windows-%d");
488              }
489              break;
490  
491          default: // others
492              index = __CFGetEncodingIndex(encoding);
493  
494              if (kCFNotFound != index) {
495                  if (((0 == (encoding & 0x0F00)) && (kCFStringEncodingMacRoman != encoding)) || (kCFStringEncodingMacRomanLatin1 == encoding)) format = "x-mac-%s";
496                  name = (const char *)__CFCanonicalNameList[index];
497              }
498              break;
499      }
500  
501      if ((0 == value) && (NULL == name)) {
502          return false;
503      } else if (0 != value) {
504          return ((snprintf(buffer, bufferSize, format, value) < bufferSize) ? true : false);
505      } else {
506          return ((snprintf(buffer, bufferSize, format, name) < bufferSize) ? true : false);
507      }
508  }
509  
510  #define LENGTH_LIMIT (256)
511  static Boolean __CFCanonicalNameCompare(const void *value1, const void *value2) { return ((0 == strncasecmp_l((const char *)value1, (const char *)value2, LENGTH_LIMIT, NULL)) ? true : false); }
512  
513  static CFHashCode __CFCanonicalNameHash(const void *value) {
514      const char *name = (const char *)value;
515      CFHashCode code = 0;
516  
517      while ((0 != *name) && ((name - (const char *)value) < LENGTH_LIMIT)) {
518          char character = *(name++);
519  
520          code += (character + (((character >= 'A') && (character <= 'Z')) ? 'a' - 'A' : 0));
521      }
522  
523      return code * (name - (const char *)value);
524  }
525  
526  CF_PRIVATE CFStringEncoding __CFStringEncodingGetFromCanonicalName(const char *canonicalName) {
527      CFStringEncoding encoding;
528      CFIndex prefixLength;
529      static CFMutableDictionaryRef mappingTable = NULL;
530      static CFLock_t lock = CFLockInit;
531  
532      prefixLength = strlen("iso-8859-");
533      if (0 == strncasecmp_l(canonicalName, "iso-8859-", prefixLength, NULL)) {// do ISO
534          encoding = strtol(canonicalName + prefixLength, NULL, 10);
535  
536          return (((0 == encoding) || (encoding > 16)) ? kCFStringEncodingInvalidId : encoding + 0x0200);
537      }
538  
539      prefixLength = strlen("cp");
540      if (0 == strncasecmp_l(canonicalName, "cp", prefixLength, NULL)) {// do DOS
541          encoding = strtol(canonicalName + prefixLength, NULL, 10);
542  
543          return __CFStringEncodingGetFromWindowsCodePage(encoding);
544      }
545  
546      prefixLength = strlen("windows-");
547      if (0 == strncasecmp_l(canonicalName, "windows-", prefixLength, NULL)) {// do DOS
548          encoding = strtol(canonicalName + prefixLength, NULL, 10);
549          
550          return __CFStringEncodingGetFromWindowsCodePage(encoding);
551      }
552      
553      __CFLock(&lock);
554      if (NULL == mappingTable) {
555          CFIndex index, count = sizeof(__CFKnownEncodingList) / sizeof(*__CFKnownEncodingList);
556  
557          CFDictionaryKeyCallBacks keys = {
558              0, NULL, NULL, NULL, &__CFCanonicalNameCompare, &__CFCanonicalNameHash
559          };
560  
561          mappingTable = CFDictionaryCreateMutable(NULL, 0, &keys, NULL);
562  
563          // Add UTFs
564          CFDictionarySetValue(mappingTable, "utf-7", (const void *)kCFStringEncodingUTF7);
565          CFDictionarySetValue(mappingTable, "utf-8", (const void *)kCFStringEncodingUTF8);
566          CFDictionarySetValue(mappingTable, "utf-16", (const void *)kCFStringEncodingUTF16);
567          CFDictionarySetValue(mappingTable, "utf-16be", (const void *)kCFStringEncodingUTF16BE);
568          CFDictionarySetValue(mappingTable, "utf-16le", (const void *)kCFStringEncodingUTF16LE);
569          CFDictionarySetValue(mappingTable, "utf-32", (const void *)kCFStringEncodingUTF32);
570          CFDictionarySetValue(mappingTable, "utf-32be", (const void *)kCFStringEncodingUTF32BE);
571          CFDictionarySetValue(mappingTable, "utf-32le", (const void *)kCFStringEncodingUTF32LE);
572  
573          for (index = 0;index < count;index++) {
574              if (NULL != __CFCanonicalNameList[index]) CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)__CFCanonicalNameList[index], (const void *)(uintptr_t)__CFKnownEncodingList[index]);
575          }
576      }
577      __CFUnlock(&lock);
578  
579      if (0 == strncasecmp_l(canonicalName, "macintosh", sizeof("macintosh") - 1, NULL)) return kCFStringEncodingMacRoman;
580  
581      
582      prefixLength = strlen("x-mac-");
583      encoding = (CFStringEncoding)(CFIndex)CFDictionaryGetValue(mappingTable, canonicalName + ((0 == strncasecmp_l(canonicalName, "x-mac-", prefixLength, NULL)) ? prefixLength : 0));
584  
585      return ((0 == encoding) ? kCFStringEncodingInvalidId : encoding);
586  }
587  #undef LENGTH_LIMIT
588  
589  #if DEPLOYMENT_TARGET_MACOSX
590  // This list indexes from DOS range
591  static uint16_t __CFISO8859SimilarScriptList[] = {
592      kCFStringEncodingMacRoman,
593      kCFStringEncodingMacCentralEurRoman,
594      kCFStringEncodingMacRoman,
595      kCFStringEncodingMacCentralEurRoman,
596      kCFStringEncodingMacCyrillic,
597      kCFStringEncodingMacArabic,
598      kCFStringEncodingMacGreek,
599      kCFStringEncodingMacHebrew,
600      kCFStringEncodingMacTurkish,
601      kCFStringEncodingMacInuit,
602      kCFStringEncodingMacThai,
603      kCFStringEncodingMacRoman,
604      kCFStringEncodingMacCentralEurRoman,
605      kCFStringEncodingMacCeltic,
606      kCFStringEncodingMacRoman,
607      kCFStringEncodingMacRomanian};
608  
609  static uint16_t __CFOtherSimilarScriptList[] = {
610      kCFStringEncodingMacRoman,
611      kCFStringEncodingMacGreek,
612      kCFStringEncodingMacCentralEurRoman,
613      kCFStringEncodingMacRoman,
614      kCFStringEncodingMacGreek,
615      kCFStringEncodingMacCentralEurRoman,
616      kCFStringEncodingMacCyrillic,
617      kCFStringEncodingMacTurkish,
618      kCFStringEncodingMacRoman,
619      kCFStringEncodingMacIcelandic,
620      kCFStringEncodingMacHebrew,
621      kCFStringEncodingMacRoman,
622      kCFStringEncodingMacArabic,
623      kCFStringEncodingMacInuit,
624      kCFStringEncodingMacCyrillic,
625      kCFStringEncodingMacGreek,
626      kCFStringEncodingMacThai,
627      kCFStringEncodingMacJapanese,
628      kCFStringEncodingMacChineseSimp,
629      kCFStringEncodingMacKorean,
630      kCFStringEncodingMacChineseTrad,
631      
632      kCFStringEncodingMacRoman,
633      kCFStringEncodingMacCentralEurRoman,
634      kCFStringEncodingMacCyrillic,
635      kCFStringEncodingMacGreek,
636      kCFStringEncodingMacTurkish,
637      kCFStringEncodingMacHebrew,
638      kCFStringEncodingMacArabic,
639      kCFStringEncodingMacCentralEurRoman,
640      kCFStringEncodingMacVietnamese,
641      kCFStringEncodingMacKorean,
642  
643      kCFStringEncodingMacRoman,
644  
645      kCFStringEncodingMacJapanese,
646      kCFStringEncodingMacChineseSimp,
647      
648      kCFStringEncodingMacJapanese,
649      kCFStringEncodingMacJapanese,
650      kCFStringEncodingMacJapanese,
651      kCFStringEncodingMacJapanese,
652      kCFStringEncodingMacChineseSimp,
653      kCFStringEncodingMacChineseSimp,
654      kCFStringEncodingMacKorean,
655      kCFStringEncodingMacJapanese,
656      kCFStringEncodingMacChineseSimp,
657      kCFStringEncodingMacChineseTrad,
658      kCFStringEncodingMacKorean,
659  
660      kCFStringEncodingMacJapanese,
661  
662      kCFStringEncodingMacCyrillic,
663  
664      kCFStringEncodingMacChineseTrad,
665  
666      kCFStringEncodingMacRoman,
667      kCFStringEncodingMacChineseSimp,
668      kCFStringEncodingMacChineseTrad,
669      kCFStringEncodingMacVietnamese,
670      kCFStringEncodingMacUkrainian,
671      kCFStringEncodingMacChineseTrad,
672      kCFStringEncodingMacRoman,
673      
674      kCFStringEncodingMacRoman,
675      
676      kCFStringEncodingMacRoman
677  };
678  
679  static const char *__CFISONameList[] = {
680      "Western (ISO Latin 1)",
681      "Central European (ISO Latin 2)",
682      "Western (ISO Latin 3)",
683      "Central European (ISO Latin 4)",
684      "Cyrillic (ISO 8859-5)",
685      "Arabic (ISO 8859-6)",
686      "Greek (ISO 8859-7)",
687      "Hebrew (ISO 8859-8)",
688      "Turkish (ISO Latin 5)",
689      "Nordic (ISO Latin 6)",
690      "Thai (ISO 8859-11)",
691      NULL,
692      "Baltic (ISO Latin 7)",
693      "Celtic (ISO Latin 8)",
694      "Western (ISO Latin 9)",
695      "Romanian (ISO Latin 10)",
696  };
697  
698  static const char *__CFOtherNameList[] = {
699      "Western (Mac OS Roman)",
700      "Japanese (Mac OS)",
701      "Traditional Chinese (Mac OS)",
702      "Korean (Mac OS)",
703      "Arabic (Mac OS)",
704      "Hebrew (Mac OS)",
705      "Greek (Mac OS)",
706      "Cyrillic (Mac OS)",
707      "Devanagari (Mac OS)",
708      "Gurmukhi (Mac OS)",
709      "Gujarati (Mac OS)",
710      "Oriya (Mac OS)",
711      "Bengali (Mac OS)",
712      "Tamil (Mac OS)",
713      "Telugu (Mac OS)",
714      "Kannada (Mac OS)",
715      "Malayalam (Mac OS)",
716      "Sinhalese (Mac OS)",
717      "Burmese (Mac OS)",
718      "Khmer (Mac OS)",
719      "Thai (Mac OS)",
720      "Laotian (Mac OS)",
721      "Georgian (Mac OS)",
722      "Armenian (Mac OS)",
723      "Simplified Chinese (Mac OS)",
724      "Tibetan (Mac OS)",
725      "Mongolian (Mac OS)",
726      "Ethiopic (Mac OS)",
727      "Central European (Mac OS)",
728      "Vietnamese (Mac OS)",
729      "Symbol (Mac OS)",
730      "Dingbats (Mac OS)",
731      "Turkish (Mac OS)",
732      "Croatian (Mac OS)",
733      "Icelandic (Mac OS)",
734      "Romanian (Mac OS)",
735      "Celtic (Mac OS)",
736      "Gaelic (Mac OS)",
737      "Farsi (Mac OS)",
738      "Cyrillic (Mac OS Ukrainian)",
739      "Inuit (Mac OS)",
740      "Latin-US (DOS)",
741      "Greek (DOS)",
742      "Baltic (DOS)",
743      "Western (DOS Latin 1)",
744      "Greek (DOS Greek 1)",
745      "Central European (DOS Latin 2)",
746      "Cyrillic (DOS)",
747      "Turkish (DOS)",
748      "Portuguese (DOS)",
749      "Icelandic (DOS)",
750      "Hebrew (DOS)",
751      "Canadian French (DOS)",
752      "Arabic (DOS)",
753      "Nordic (DOS)",
754      "Russian (DOS)",
755      "Greek (DOS Greek 2)",
756      "Thai (Windows, DOS)",
757      "Japanese (Windows, DOS)",
758      "Simplified Chinese (Windows, DOS)",
759      "Korean (Windows, DOS)",
760      "Traditional Chinese (Windows, DOS)",
761      "Western (Windows Latin 1)",
762      "Central European (Windows Latin 2)",
763      "Cyrillic (Windows)",
764      "Greek (Windows)",
765      "Turkish (Windows Latin 5)",
766      "Hebrew (Windows)",
767      "Arabic (Windows)",
768      "Baltic (Windows)",
769      "Vietnamese (Windows)",
770      "Korean (Windows Johab)",
771      "Western (ASCII)",
772      "Japanese (Shift JIS X0213)",
773      "Chinese (GB 18030)",
774      "Japanese (ISO 2022-JP)",
775      "Japanese (ISO 2022-JP-2)",
776      "Japanese (ISO 2022-JP-1)",
777      "Japanese (ISO 2022-JP-3)",
778      "Chinese (ISO 2022-CN)",
779      "Chinese (ISO 2022-CN-EXT)",
780      "Korean (ISO 2022-KR)",
781      "Japanese (EUC)",
782      "Simplified Chinese (GB 2312)",
783      "Traditional Chinese (EUC)",
784      "Korean (EUC)",
785      "Japanese (Shift JIS)",
786      "Cyrillic (KOI8-R)",
787      "Traditional Chinese (Big 5)",
788      "Western (Mac Mail)",
789      "Simplified Chinese (HZ GB 2312)",
790      "Traditional Chinese (Big 5 HKSCS)",
791      NULL,
792      "Ukrainian (KOI8-U)",
793      "Traditional Chinese (Big 5-E)",
794      NULL,
795      "Western (NextStep)",
796      "Western (EBCDIC Latin 1)",
797  };
798  #endif /* DEPLOYMENT_TARGET_MACOSX */
799  
800  CF_PRIVATE CFStringEncoding __CFStringEncodingGetMostCompatibleMacScript(CFStringEncoding encoding) {
801  #if DEPLOYMENT_TARGET_MACOSX
802      switch (encoding & 0x0F00) {
803          case 0: return encoding & 0xFF; break; // Mac scripts
804  
805          case 0x0100: return kCFStringEncodingUnicode; break; // Unicode
806  
807          case 0x200: // ISO 8859
808              return (((encoding & 0xFF) <= (sizeof(__CFISO8859SimilarScriptList) / sizeof(*__CFISO8859SimilarScriptList))) ? __CFISO8859SimilarScriptList[(encoding & 0xFF) - 1] : kCFStringEncodingInvalidId);
809              break;
810  
811          default: {
812              CFIndex index = __CFGetEncodingIndex(encoding);
813              
814              if (kCFNotFound != index) {
815                  index -= __CFGetEncodingIndex(kCFStringEncodingDOSLatinUS);
816                  return __CFOtherSimilarScriptList[index];
817              }
818          }
819      }
820  #endif /* DEPLOYMENT_TARGET_MACOSX */
821  
822      return kCFStringEncodingInvalidId;
823  }
824  
825  CF_PRIVATE const char *__CFStringEncodingGetName(CFStringEncoding encoding) {
826      switch (encoding) {
827          case kCFStringEncodingUTF8: return "Unicode (UTF-8)"; break;
828          case kCFStringEncodingUTF16: return "Unicode (UTF-16)"; break;
829          case kCFStringEncodingUTF16BE: return "Unicode (UTF-16BE)"; break;
830          case kCFStringEncodingUTF16LE: return "Unicode (UTF-16LE)"; break;
831          case kCFStringEncodingUTF32: return "Unicode (UTF-32)"; break;
832          case kCFStringEncodingUTF32BE: return "Unicode (UTF-32BE)"; break;
833          case kCFStringEncodingUTF32LE: return "Unicode (UTF-32LE)"; break;
834          case kCFStringEncodingNonLossyASCII: return "Non-lossy ASCII"; break;
835          case kCFStringEncodingUTF7: return "Unicode (UTF-7)"; break;
836      }
837  
838  #if DEPLOYMENT_TARGET_MACOSX
839      if (0x0200 == (encoding & 0x0F00)) {
840          encoding &= 0x00FF;
841  
842          if (encoding <= (sizeof(__CFISONameList) / sizeof(*__CFISONameList))) return __CFISONameList[encoding - 1];
843      } else {
844          CFIndex index = __CFGetEncodingIndex(encoding);
845  
846          if (kCFNotFound != index) return __CFOtherNameList[index];
847      }
848  #endif /* DEPLOYMENT_TARGET_MACOSX */
849      
850      return NULL;
851  }