/ CFPlatformConverters.c
CFPlatformConverters.c
  1  /*
  2   * Copyright (c) 2015 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. Please obtain a copy of the License at
 10   * http://www.opensource.apple.com/apsl/ and read it before using this
 11   * file.
 12   *
 13   * The Original Code and all software distributed under the License are
 14   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 15   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 16   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 17   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 18   * Please see the License for the specific language governing rights and
 19   * limitations under the License.
 20   *
 21   * @APPLE_LICENSE_HEADER_END@
 22   */
 23  
 24  /*	CFPlatformConverters.c
 25  	Copyright (c) 1998-2014, Apple Inc. All rights reserved.
 26  	Responsibility: Aki Inoue
 27  */
 28  
 29  #include "CFInternal.h"
 30  #include <CoreFoundation/CFString.h>
 31  #include "CFStringEncodingConverterExt.h"
 32  #include <CoreFoundation/CFStringEncodingExt.h>
 33  #include "CFUniChar.h"
 34  #include "CFUnicodeDecomposition.h"
 35  #include "CFStringEncodingConverterPriv.h"
 36  #include "CFICUConverters.h"
 37  
 38  
 39  CF_INLINE bool __CFIsPlatformConverterAvailable(int encoding) {
 40  
 41  #if DEPLOYMENT_TARGET_WINDOWS
 42      return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding)) ? true : false);
 43  #else
 44      return false;
 45  #endif
 46  }
 47  
 48  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 49  
 50  static const CFStringEncodingConverter __CFICUBootstrap = {
 51      NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
 52      kCFStringEncodingConverterICU /* encodingClass */,
 53      NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */,
 54      NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */
 55  };
 56  
 57  #endif
 58  
 59  static const CFStringEncodingConverter __CFPlatformBootstrap = {
 60      NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */,
 61      kCFStringEncodingConverterPlatformSpecific /* encodingClass */,
 62      NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */,
 63      NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */
 64  };
 65  
 66  CF_PRIVATE const CFStringEncodingConverter *__CFStringEncodingGetExternalConverter(uint32_t encoding) {
 67  
 68      // we prefer Text Encoding Converter ICU since it's more reliable
 69      if (__CFIsPlatformConverterAvailable(encoding)) {
 70          return &__CFPlatformBootstrap;
 71      } else {
 72  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
 73          if (__CFStringEncodingGetICUName(encoding)) {
 74              return &__CFICUBootstrap;
 75          }
 76  #endif
 77          return NULL;
 78      }
 79  }
 80  
 81  #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
 82  CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) {
 83  
 84      return NULL;
 85  }
 86  #elif DEPLOYMENT_TARGET_WINDOWS
 87          
 88  #include <tchar.h>
 89  
 90  static uint32_t __CFWin32EncodingIndex = 0;
 91  static CFStringEncoding *__CFWin32EncodingList = NULL;
 92  
 93  static char CALLBACK __CFWin32EnumCodePageProc(LPTSTR string) {
 94      uint32_t encoding = CFStringConvertWindowsCodepageToEncoding(_tcstoul(string, NULL, 10));
 95      CFIndex idx;
 96      
 97      if (encoding != kCFStringEncodingInvalidId) { // We list only encodings we know
 98          if (__CFWin32EncodingList) {
 99              for (idx = 0;idx < (CFIndex)__CFWin32EncodingIndex;idx++) if (__CFWin32EncodingList[idx] == encoding) break;
100              if (idx != __CFWin32EncodingIndex) return true;
101              __CFWin32EncodingList[__CFWin32EncodingIndex] = encoding;
102          }
103          ++__CFWin32EncodingIndex;
104      }
105      return true;
106  }
107  
108  CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) {
109      CFStringEncoding *encodings;
110  
111      EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED);
112      __CFWin32EncodingList = (uint32_t *)CFAllocatorAllocate(allocator, sizeof(uint32_t) * __CFWin32EncodingIndex, 0);
113      EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED);
114  
115      *numberOfConverters = __CFWin32EncodingIndex;
116      encodings = __CFWin32EncodingList;
117  
118      __CFWin32EncodingIndex = 0;
119      __CFWin32EncodingList = NULL;
120  
121      return encodings;
122  }
123  #else
124  CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { return NULL; }
125  #endif
126  
127  CF_PRIVATE CFIndex __CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
128  
129  #if DEPLOYMENT_TARGET_WINDOWS
130      WORD dwFlags = 0;
131      CFIndex usedLen;
132  
133      if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
134          dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? WC_DEFAULTCHAR : 0);
135          dwFlags |= (flags & kCFStringEncodingComposeCombinings ? WC_COMPOSITECHECK : 0);
136          dwFlags |= (flags & kCFStringEncodingIgnoreCombinings ? WC_DISCARDNS : 0);
137      }
138  
139      if ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL)) == 0) {
140          if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
141              CPINFO cpInfo;
142  
143              if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) {
144                  cpInfo.MaxCharSize = 1; // Is this right ???
145              }
146              if (cpInfo.MaxCharSize == 1) {
147                  numChars = maxByteLen;
148              } else {
149                  usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, NULL, 0, NULL, NULL);
150                  usedLen -= maxByteLen;
151                  numChars = (numChars > usedLen ? numChars - usedLen : 1);
152              }
153              if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL) == 0) {
154                  if (usedCharLen) *usedCharLen = 0;
155                  if (usedByteLen) *usedByteLen = 0;
156              } else {
157                  CFIndex lastUsedLen = 0;
158  
159                  while ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, ++numChars, (LPSTR)bytes, maxByteLen, NULL, NULL))) lastUsedLen = usedLen;
160                  if (usedCharLen) *usedCharLen = (numChars - 1);
161                  if (usedByteLen) *usedByteLen = lastUsedLen;
162              }
163  
164              return kCFStringEncodingInsufficientOutputBufferLength;
165          } else {
166              return kCFStringEncodingInvalidInputStream;
167          }
168      } else {
169          if (usedCharLen) *usedCharLen = numChars;
170          if (usedByteLen) *usedByteLen = usedLen;
171          return kCFStringEncodingConversionSuccess;
172      }
173  #endif /* DEPLOYMENT_TARGET_WINDOWS */
174  
175      return kCFStringEncodingConverterUnavailable;
176  }
177  
178  CF_PRIVATE CFIndex __CFStringEncodingPlatformBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
179  
180  #if DEPLOYMENT_TARGET_WINDOWS
181      WORD dwFlags = 0;
182      CFIndex usedLen;
183  
184      if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-*
185          dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? 0 : MB_ERR_INVALID_CHARS);
186          dwFlags |= (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? MB_COMPOSITE : MB_PRECOMPOSED);
187      }
188  
189      if ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) {
190          if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
191              CPINFO cpInfo;
192  
193              if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) {
194                  cpInfo.MaxCharSize = 1; // Is this right ???
195              }
196              if (cpInfo.MaxCharSize == 1) {
197                  numBytes = maxCharLen;
198              } else {
199                  usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen);
200                  usedLen -= maxCharLen;
201                  numBytes = (numBytes > usedLen ? numBytes - usedLen : 1);
202              }
203              while ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) {
204                  if ((--numBytes) == 0) break;
205              }
206              if (usedCharLen) *usedCharLen = usedLen;
207              if (usedByteLen) *usedByteLen = numBytes;
208  
209              return kCFStringEncodingInsufficientOutputBufferLength;
210          } else {
211              return kCFStringEncodingInvalidInputStream;
212          }
213      } else {
214          if (usedCharLen) *usedCharLen = usedLen;
215          if (usedByteLen) *usedByteLen = numBytes;
216          return kCFStringEncodingConversionSuccess;
217      }
218  #endif /* DEPLOYMENT_TARGET_WINDOWS */
219  
220      return kCFStringEncodingConverterUnavailable;
221  }
222  
223  CF_PRIVATE CFIndex __CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
224      CFIndex usedCharLen;
225      return (__CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, NULL, NULL, 0, &usedCharLen) == kCFStringEncodingConversionSuccess ? usedCharLen : 0);
226  }
227  
228  CF_PRIVATE CFIndex __CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
229      CFIndex usedByteLen;
230      return (__CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen) == kCFStringEncodingConversionSuccess ? usedByteLen : 0);
231  }
232  
233  #undef __CFCarbonCore_GetTextEncodingBase0
234