/ CFPlatformConverters.c
CFPlatformConverters.c
1 /* 2 * Copyright (c) 2015 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24 /* CFPlatformConverters.c 25 Copyright (c) 1998-2014, Apple Inc. All rights reserved. 26 Responsibility: Aki Inoue 27 */ 28 29 #include "CFInternal.h" 30 #include <CoreFoundation/CFString.h> 31 #include "CFStringEncodingConverterExt.h" 32 #include <CoreFoundation/CFStringEncodingExt.h> 33 #include "CFUniChar.h" 34 #include "CFUnicodeDecomposition.h" 35 #include "CFStringEncodingConverterPriv.h" 36 #include "CFICUConverters.h" 37 38 39 CF_INLINE bool __CFIsPlatformConverterAvailable(int encoding) { 40 41 #if DEPLOYMENT_TARGET_WINDOWS 42 return (IsValidCodePage(CFStringConvertEncodingToWindowsCodepage(encoding)) ? true : false); 43 #else 44 return false; 45 #endif 46 } 47 48 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 49 50 static const CFStringEncodingConverter __CFICUBootstrap = { 51 NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */, 52 kCFStringEncodingConverterICU /* encodingClass */, 53 NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */, 54 NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */ 55 }; 56 57 #endif 58 59 static const CFStringEncodingConverter __CFPlatformBootstrap = { 60 NULL /* toBytes */, NULL /* toUnicode */, 6 /* maxBytesPerChar */, 4 /* maxDecomposedCharLen */, 61 kCFStringEncodingConverterPlatformSpecific /* encodingClass */, 62 NULL /* toBytesLen */, NULL /* toUnicodeLen */, NULL /* toBytesFallback */, 63 NULL /* toUnicodeFallback */, NULL /* toBytesPrecompose */, NULL, /* isValidCombiningChar */ 64 }; 65 66 CF_PRIVATE const CFStringEncodingConverter *__CFStringEncodingGetExternalConverter(uint32_t encoding) { 67 68 // we prefer Text Encoding Converter ICU since it's more reliable 69 if (__CFIsPlatformConverterAvailable(encoding)) { 70 return &__CFPlatformBootstrap; 71 } else { 72 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 73 if (__CFStringEncodingGetICUName(encoding)) { 74 return &__CFICUBootstrap; 75 } 76 #endif 77 return NULL; 78 } 79 } 80 81 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED 82 CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { 83 84 return NULL; 85 } 86 #elif DEPLOYMENT_TARGET_WINDOWS 87 88 #include <tchar.h> 89 90 static uint32_t __CFWin32EncodingIndex = 0; 91 static CFStringEncoding *__CFWin32EncodingList = NULL; 92 93 static char CALLBACK __CFWin32EnumCodePageProc(LPTSTR string) { 94 uint32_t encoding = CFStringConvertWindowsCodepageToEncoding(_tcstoul(string, NULL, 10)); 95 CFIndex idx; 96 97 if (encoding != kCFStringEncodingInvalidId) { // We list only encodings we know 98 if (__CFWin32EncodingList) { 99 for (idx = 0;idx < (CFIndex)__CFWin32EncodingIndex;idx++) if (__CFWin32EncodingList[idx] == encoding) break; 100 if (idx != __CFWin32EncodingIndex) return true; 101 __CFWin32EncodingList[__CFWin32EncodingIndex] = encoding; 102 } 103 ++__CFWin32EncodingIndex; 104 } 105 return true; 106 } 107 108 CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { 109 CFStringEncoding *encodings; 110 111 EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED); 112 __CFWin32EncodingList = (uint32_t *)CFAllocatorAllocate(allocator, sizeof(uint32_t) * __CFWin32EncodingIndex, 0); 113 EnumSystemCodePages((CODEPAGE_ENUMPROC)&__CFWin32EnumCodePageProc, CP_INSTALLED); 114 115 *numberOfConverters = __CFWin32EncodingIndex; 116 encodings = __CFWin32EncodingList; 117 118 __CFWin32EncodingIndex = 0; 119 __CFWin32EncodingList = NULL; 120 121 return encodings; 122 } 123 #else 124 CF_PRIVATE CFStringEncoding *__CFStringEncodingCreateListOfAvailablePlatformConverters(CFAllocatorRef allocator, CFIndex *numberOfConverters) { return NULL; } 125 #endif 126 127 CF_PRIVATE CFIndex __CFStringEncodingPlatformUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) { 128 129 #if DEPLOYMENT_TARGET_WINDOWS 130 WORD dwFlags = 0; 131 CFIndex usedLen; 132 133 if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-* 134 dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? WC_DEFAULTCHAR : 0); 135 dwFlags |= (flags & kCFStringEncodingComposeCombinings ? WC_COMPOSITECHECK : 0); 136 dwFlags |= (flags & kCFStringEncodingIgnoreCombinings ? WC_DISCARDNS : 0); 137 } 138 139 if ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL)) == 0) { 140 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { 141 CPINFO cpInfo; 142 143 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) { 144 cpInfo.MaxCharSize = 1; // Is this right ??? 145 } 146 if (cpInfo.MaxCharSize == 1) { 147 numChars = maxByteLen; 148 } else { 149 usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, NULL, 0, NULL, NULL); 150 usedLen -= maxByteLen; 151 numChars = (numChars > usedLen ? numChars - usedLen : 1); 152 } 153 if (WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, numChars, (LPSTR)bytes, maxByteLen, NULL, NULL) == 0) { 154 if (usedCharLen) *usedCharLen = 0; 155 if (usedByteLen) *usedByteLen = 0; 156 } else { 157 CFIndex lastUsedLen = 0; 158 159 while ((usedLen = WideCharToMultiByte(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCWSTR)characters, ++numChars, (LPSTR)bytes, maxByteLen, NULL, NULL))) lastUsedLen = usedLen; 160 if (usedCharLen) *usedCharLen = (numChars - 1); 161 if (usedByteLen) *usedByteLen = lastUsedLen; 162 } 163 164 return kCFStringEncodingInsufficientOutputBufferLength; 165 } else { 166 return kCFStringEncodingInvalidInputStream; 167 } 168 } else { 169 if (usedCharLen) *usedCharLen = numChars; 170 if (usedByteLen) *usedByteLen = usedLen; 171 return kCFStringEncodingConversionSuccess; 172 } 173 #endif /* DEPLOYMENT_TARGET_WINDOWS */ 174 175 return kCFStringEncodingConverterUnavailable; 176 } 177 178 CF_PRIVATE CFIndex __CFStringEncodingPlatformBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) { 179 180 #if DEPLOYMENT_TARGET_WINDOWS 181 WORD dwFlags = 0; 182 CFIndex usedLen; 183 184 if ((kCFStringEncodingUTF7 != encoding) && (kCFStringEncodingGB_18030_2000 != encoding) && (0x0800 != (encoding & 0x0F00))) { // not UTF-7/GB18030/ISO-2022-* 185 dwFlags |= (flags & (kCFStringEncodingAllowLossyConversion|kCFStringEncodingSubstituteCombinings) ? 0 : MB_ERR_INVALID_CHARS); 186 dwFlags |= (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? MB_COMPOSITE : MB_PRECOMPOSED); 187 } 188 189 if ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) { 190 if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { 191 CPINFO cpInfo; 192 193 if (!GetCPInfo(CFStringConvertEncodingToWindowsCodepage(encoding), &cpInfo)) { 194 cpInfo.MaxCharSize = 1; // Is this right ??? 195 } 196 if (cpInfo.MaxCharSize == 1) { 197 numBytes = maxCharLen; 198 } else { 199 usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen); 200 usedLen -= maxCharLen; 201 numBytes = (numBytes > usedLen ? numBytes - usedLen : 1); 202 } 203 while ((usedLen = MultiByteToWideChar(CFStringConvertEncodingToWindowsCodepage(encoding), dwFlags, (LPCSTR)bytes, numBytes, (LPWSTR)characters, maxCharLen)) == 0) { 204 if ((--numBytes) == 0) break; 205 } 206 if (usedCharLen) *usedCharLen = usedLen; 207 if (usedByteLen) *usedByteLen = numBytes; 208 209 return kCFStringEncodingInsufficientOutputBufferLength; 210 } else { 211 return kCFStringEncodingInvalidInputStream; 212 } 213 } else { 214 if (usedCharLen) *usedCharLen = usedLen; 215 if (usedByteLen) *usedByteLen = numBytes; 216 return kCFStringEncodingConversionSuccess; 217 } 218 #endif /* DEPLOYMENT_TARGET_WINDOWS */ 219 220 return kCFStringEncodingConverterUnavailable; 221 } 222 223 CF_PRIVATE CFIndex __CFStringEncodingPlatformCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) { 224 CFIndex usedCharLen; 225 return (__CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, NULL, NULL, 0, &usedCharLen) == kCFStringEncodingConversionSuccess ? usedCharLen : 0); 226 } 227 228 CF_PRIVATE CFIndex __CFStringEncodingPlatformByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) { 229 CFIndex usedByteLen; 230 return (__CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen) == kCFStringEncodingConversionSuccess ? usedByteLen : 0); 231 } 232 233 #undef __CFCarbonCore_GetTextEncodingBase0 234