/ CFLocaleIdentifier.c
CFLocaleIdentifier.c
1 /* 2 * Copyright (c) 2015 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24 /* 25 CFLocaleIdentifier.c 26 Copyright (c) 2002-2014, Apple Inc. All rights reserved. 27 Responsibility: David Smith 28 29 CFLocaleIdentifier.c defines 30 - enum value kLocaleIdentifierCStringMax 31 - structs KeyStringToResultString, SpecialCaseUpdates 32 and provides the following data for the functions 33 CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, 34 CFLocaleCreateCanonicalLocaleIdentifierFromString 35 CFLocaleCreateCanonicalLanguageIdentifierFromString 36 37 1. static const char * regionCodeToLocaleString[]; enum kNumRegionCodeToLocaleString; 38 map RegionCode 0..kNumRegionCodeToLocaleString-1 to canonical locale string 39 40 2. static const char * langCodeToLocaleString[]; enum kNumLangCodeToLocaleString; 41 map LangCode 0..kNumLangCodeToLocaleString-1 to canonical locale string 42 43 3. static const KeyStringToResultString oldAppleLocaleToCanonical[]; enum kNumOldAppleLocaleToCanonical; 44 map old Apple string oldAppleLocaleToCanonical[n].key 45 to canonical locale string oldAppleLocaleToCanonical[n].result 46 for n = 0..kNumOldAppleLocaleToCanonical-1 47 48 4. static const KeyStringToResultString localeStringPrefixToCanonical[]; enum kNumLocaleStringPrefixToCanonical; 49 map non-canonical language prefix (3-letter, obsolete) localeStringPrefixToCanonical[].key 50 to updated replacement localeStringPrefixToCanonical[].result 51 for n = 0..kNumLocaleStringPrefixToCanonical-1 52 53 5. static const SpecialCaseUpdates specialCases[]; 54 various special cases for updating region codes, or for updating language codes based on region codes 55 56 6. static const KeyStringToResultString localeStringRegionToDefaults[]; enum kNumLocaleStringRegionToDefaults; 57 map locale string region tag localeStringRegionToDefaults[n].key 58 to default substrings to delete localeStringRegionToDefaults[n].result 59 for n = 0..kNumLocaleStringRegionToDefaults-1 60 61 7. static const KeyStringToResultString localeStringPrefixToDefaults[]; enum kNumLocaleStringPrefixToDefaults; 62 map locale string initial part localeStringPrefixToDefaults[n].key 63 to default substrings to delete localeStringPrefixToDefaults[n].result 64 for n = 0..kNumLocaleStringPrefixToDefaults-1 65 66 8. static const KeyStringToResultString appleLocaleToLanguageString[]; enum kNumAppleLocaleToLanguageString; 67 map Apple locale string appleLocaleToLanguageString[].key 68 to equivalent language string appleLocaleToLanguageString[].result 69 for n = 0..kNumAppleLocaleToLanguageString-1 70 71 */ 72 73 #include <CoreFoundation/CFString.h> 74 #include <CoreFoundation/CFCalendar.h> 75 #include <ctype.h> 76 #include <string.h> 77 #include <stdlib.h> 78 #include <stdio.h> 79 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 80 #include <unicode/uloc.h> 81 #else 82 #define ULOC_KEYWORD_SEPARATOR '@' 83 #define ULOC_FULLNAME_CAPACITY 56 84 #define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 85 #endif 86 #include "CFInternal.h" 87 #include "CFLocaleInternal.h" 88 89 // Max byte length of locale identifier (ASCII) as C string, including terminating null byte 90 enum { 91 kLocaleIdentifierCStringMax = ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY // currently 56 + 100 92 }; 93 94 // KeyStringToResultString struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString 95 struct KeyStringToResultString { 96 const char * key; 97 const char * result; 98 }; 99 typedef struct KeyStringToResultString KeyStringToResultString; 100 101 // SpecialCaseUpdates struct used in data tables for CFLocaleCreateCanonicalLocaleIdentifierFromString 102 struct SpecialCaseUpdates { 103 const char * lang; 104 const char * reg1; 105 const char * update1; 106 const char * reg2; 107 const char * update2; 108 }; 109 typedef struct SpecialCaseUpdates SpecialCaseUpdates; 110 111 112 static const char * const regionCodeToLocaleString[] = { 113 // map RegionCode (array index) to canonical locale string 114 // 115 // canon. string region code; language code; [comment] [ # __CFBundleLocaleAbbreviationsArray 116 // -------- ------------ ------------------ ------------ -------- string, if different ] 117 "en_US", // 0 verUS; 0 langEnglish; 118 "fr_FR", // 1 verFrance; 1 langFrench; 119 "en_GB", // 2 verBritain; 0 langEnglish; 120 "de_DE", // 3 verGermany; 2 langGerman; 121 "it_IT", // 4 verItaly; 3 langItalian; 122 "nl_NL", // 5 verNetherlands; 4 langDutch; 123 "nl_BE", // 6 verFlemish; 34 langFlemish (redundant, =Dutch); 124 "sv_SE", // 7 verSweden; 5 langSwedish; 125 "es_ES", // 8 verSpain; 6 langSpanish; 126 "da_DK", // 9 verDenmark; 7 langDanish; 127 "pt_PT", // 10 verPortugal; 8 langPortuguese; 128 "fr_CA", // 11 verFrCanada; 1 langFrench; 129 "nb_NO", // 12 verNorway; 9 langNorwegian (Bokmal); # "no_NO" 130 "he_IL", // 13 verIsrael; 10 langHebrew; 131 "ja_JP", // 14 verJapan; 11 langJapanese; 132 "en_AU", // 15 verAustralia; 0 langEnglish; 133 "ar", // 16 verArabic; 12 langArabic; 134 "fi_FI", // 17 verFinland; 13 langFinnish; 135 "fr_CH", // 18 verFrSwiss; 1 langFrench; 136 "de_CH", // 19 verGrSwiss; 2 langGerman; 137 "el_GR", // 20 verGreece; 14 langGreek (modern)-Grek-mono; 138 "is_IS", // 21 verIceland; 15 langIcelandic; 139 "mt_MT", // 22 verMalta; 16 langMaltese; 140 "el_CY", // 23 verCyprus; 14 langGreek?; el or tr? guess el # "" 141 "tr_TR", // 24 verTurkey; 17 langTurkish; 142 "hr_HR", // 25 verYugoCroatian; 18 langCroatian; * one-way mapping -> verCroatia 143 "nl_NL", // 26 KCHR, Netherlands; 4 langDutch; * one-way mapping 144 "nl_BE", // 27 KCHR, verFlemish; 34 langFlemish; * one-way mapping 145 "_CA", // 28 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" 146 "_CA", // 29 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" 147 "pt_PT", // 30 KCHR, Portugal; 8 langPortuguese; * one-way mapping 148 "nb_NO", // 31 KCHR, Norway; 9 langNorwegian (Bokmal); * one-way mapping # "no_NO" 149 "da_DK", // 32 KCHR, Denmark; 7 langDanish; * one-way mapping 150 "hi_IN", // 33 verIndiaHindi; 21 langHindi; 151 "ur_PK", // 34 verPakistanUrdu; 20 langUrdu; 152 "tr_TR", // 35 verTurkishModified; 17 langTurkish; * one-way mapping 153 "it_CH", // 36 verItalianSwiss; 3 langItalian; 154 "en_001", // 37 verInternational; 0 langEnglish; ASCII only # "en" 155 NULL, // 38 *unassigned; -1 none; * one-way mapping # "" 156 "ro_RO", // 39 verRomania; 37 langRomanian; 157 "grc", // 40 verGreekAncient; 148 langGreekAncient -Grek-poly; # "el_GR" 158 "lt_LT", // 41 verLithuania; 24 langLithuanian; 159 "pl_PL", // 42 verPoland; 25 langPolish; 160 "hu_HU", // 43 verHungary; 26 langHungarian; 161 "et_EE", // 44 verEstonia; 27 langEstonian; 162 "lv_LV", // 45 verLatvia; 28 langLatvian; 163 "se", // 46 verSami; 29 langSami; 164 "fo_FO", // 47 verFaroeIsl; 30 langFaroese; 165 "fa_IR", // 48 verIran; 31 langFarsi/Persian; 166 "ru_RU", // 49 verRussia; 32 langRussian; 167 "ga_IE", // 50 verIreland; 35 langIrishGaelic (no dots); 168 "ko_KR", // 51 verKorea; 23 langKorean; 169 "zh_CN", // 52 verChina; 33 langSimpChinese; 170 "zh_TW", // 53 verTaiwan; 19 langTradChinese; 171 "th_TH", // 54 verThailand; 22 langThai; 172 "und", // 55 verScriptGeneric; -1 none; # "" // <1.9> 173 "cs_CZ", // 56 verCzech; 38 langCzech; 174 "sk_SK", // 57 verSlovak; 39 langSlovak; 175 "und", // 58 verEastAsiaGeneric; -1 none; * one-way mapping # "" // <1.9> 176 "hu_HU", // 59 verMagyar; 26 langHungarian; * one-way mapping -> verHungary 177 "bn", // 60 verBengali; 67 langBengali; _IN or _BD? guess generic 178 "be_BY", // 61 verBelarus; 46 langBelorussian; 179 "uk_UA", // 62 verUkraine; 45 langUkrainian; 180 NULL, // 63 *unused; -1 none; * one-way mapping # "" 181 "el_GR", // 64 verGreeceAlt; 14 langGreek (modern)-Grek-mono; * one-way mapping 182 "sr_RS", // 65 verSerbian; 42 langSerbian -Cyrl; // <1.18> 183 "sl_SI", // 66 verSlovenian; 40 langSlovenian; 184 "mk_MK", // 67 verMacedonian; 43 langMacedonian; 185 "hr_HR", // 68 verCroatia; 18 langCroatian; 186 NULL, // 69 *unused; -1 none; * one-way mapping # "" 187 "de-1996", // 70 verGermanReformed; 2 langGerman; 1996 orthogr. # "de_DE" 188 "pt_BR", // 71 verBrazil; 8 langPortuguese; 189 "bg_BG", // 72 verBulgaria; 44 langBulgarian; 190 "ca_ES", // 73 verCatalonia; 130 langCatalan; 191 "mul", // 74 verMultilingual; -1 none; # "" 192 "gd", // 75 verScottishGaelic; 144 langScottishGaelic; 193 "gv", // 76 verManxGaelic; 145 langManxGaelic; 194 "br", // 77 verBreton; 142 langBreton; 195 "iu_CA", // 78 verNunavut; 143 langInuktitut -Cans; 196 "cy", // 79 verWelsh; 128 langWelsh; 197 "_CA", // 80 KCHR, Canada-en/fr?; -1 none; * one-way mapping # "en_CA" 198 "ga-Latg_IE", // 81 verIrishGaelicScrip; 146 langIrishGaelicScript -dots; # "ga_IE" // <xx> 199 "en_CA", // 82 verEngCanada; 0 langEnglish; 200 "dz_BT", // 83 verBhutan; 137 langDzongkha; 201 "hy_AM", // 84 verArmenian; 51 langArmenian; 202 "ka_GE", // 85 verGeorgian; 52 langGeorgian; 203 "es_419", // 86 verSpLatinAmerica; 6 langSpanish; # "es" 204 "es_ES", // 87 KCHR, Spain; 6 langSpanish; * one-way mapping 205 "to_TO", // 88 verTonga; 147 langTongan; 206 "pl_PL", // 89 KCHR, Poland; 25 langPolish; * one-way mapping 207 "ca_ES", // 90 KCHR, Catalonia; 130 langCatalan; * one-way mapping 208 "fr_001", // 91 verFrenchUniversal; 1 langFrench; 209 "de_AT", // 92 verAustria; 2 langGerman; 210 "es_419", // 93 > verSpLatinAmerica; 6 langSpanish; * one-way mapping # "es" 211 "gu_IN", // 94 verGujarati; 69 langGujarati; 212 "pa", // 95 verPunjabi; 70 langPunjabi; _IN or _PK? guess generic 213 "ur_IN", // 96 verIndiaUrdu; 20 langUrdu; 214 "vi_VN", // 97 verVietnam; 80 langVietnamese; 215 "fr_BE", // 98 verFrBelgium; 1 langFrench; 216 "uz_UZ", // 99 verUzbek; 47 langUzbek; 217 "en_SG", // 100 verSingapore; 0 langEnglish?; en, zh, or ms? guess en # "" 218 "nn_NO", // 101 verNynorsk; 151 langNynorsk; # "" 219 "af_ZA", // 102 verAfrikaans; 141 langAfrikaans; 220 "eo", // 103 verEsperanto; 94 langEsperanto; 221 "mr_IN", // 104 verMarathi; 66 langMarathi; 222 "bo", // 105 verTibetan; 63 langTibetan; 223 "ne_NP", // 106 verNepal; 64 langNepali; 224 "kl", // 107 verGreenland; 149 langGreenlandic; 225 "en_IE", // 108 verIrelandEnglish; 0 langEnglish; # (no entry) 226 }; 227 enum { 228 kNumRegionCodeToLocaleString = sizeof(regionCodeToLocaleString)/sizeof(char *) 229 }; 230 231 static const char * const langCodeToLocaleString[] = { 232 // map LangCode (array index) to canonical locale string 233 // 234 // canon. string language code; [ comment] [ # __CFBundleLanguageAbbreviationsArray 235 // -------- -------------- ---------- -------- string, if different ] 236 "en", // 0 langEnglish; 237 "fr", // 1 langFrench; 238 "de", // 2 langGerman; 239 "it", // 3 langItalian; 240 "nl", // 4 langDutch; 241 "sv", // 5 langSwedish; 242 "es", // 6 langSpanish; 243 "da", // 7 langDanish; 244 "pt", // 8 langPortuguese; 245 "nb", // 9 langNorwegian (Bokmal); # "no" 246 "he", // 10 langHebrew -Hebr; 247 "ja", // 11 langJapanese -Jpan; 248 "ar", // 12 langArabic -Arab; 249 "fi", // 13 langFinnish; 250 "el", // 14 langGreek (modern)-Grek-mono; 251 "is", // 15 langIcelandic; 252 "mt", // 16 langMaltese -Latn; 253 "tr", // 17 langTurkish -Latn; 254 "hr", // 18 langCroatian; 255 "zh-Hant", // 19 langTradChinese; # "zh" 256 "ur", // 20 langUrdu -Arab; 257 "hi", // 21 langHindi -Deva; 258 "th", // 22 langThai -Thai; 259 "ko", // 23 langKorean -Hang; 260 "lt", // 24 langLithuanian; 261 "pl", // 25 langPolish; 262 "hu", // 26 langHungarian; 263 "et", // 27 langEstonian; 264 "lv", // 28 langLatvian; 265 "se", // 29 langSami; 266 "fo", // 30 langFaroese; 267 "fa", // 31 langFarsi/Persian -Arab; 268 "ru", // 32 langRussian -Cyrl; 269 "zh-Hans", // 33 langSimpChinese; # "zh" 270 "nl-BE", // 34 langFlemish (redundant, =Dutch); # "nl" 271 "ga", // 35 langIrishGaelic (no dots); 272 "sq", // 36 langAlbanian; no region codes 273 "ro", // 37 langRomanian; 274 "cs", // 38 langCzech; 275 "sk", // 39 langSlovak; 276 "sl", // 40 langSlovenian; 277 "yi", // 41 langYiddish -Hebr; no region codes 278 "sr", // 42 langSerbian -Cyrl; 279 "mk", // 43 langMacedonian -Cyrl; 280 "bg", // 44 langBulgarian -Cyrl; 281 "uk", // 45 langUkrainian -Cyrl; 282 "be", // 46 langBelorussian -Cyrl; 283 "uz", // 47 langUzbek -Cyrl; also -Latn, -Arab 284 "kk", // 48 langKazakh -Cyrl; no region codes; also -Latn, -Arab 285 "az-Cyrl", // 49 langAzerbaijani -Cyrl; no region codes # "az" 286 "az-Arab", // 50 langAzerbaijanAr -Arab; no region codes # "az" 287 "hy", // 51 langArmenian -Armn; 288 "ka", // 52 langGeorgian -Geor; 289 "mo", // 53 langMoldavian -Cyrl; no region codes 290 "ky", // 54 langKirghiz -Cyrl; no region codes; also -Latn, -Arab 291 "tg", // 55 langTajiki -Cyrl; no region codes; also -Latn, -Arab 292 "tk-Cyrl", // 56 langTurkmen -Cyrl; no region codes; also -Latn, -Arab 293 "mn-Mong", // 57 langMongolian -Mong; no region codes # "mn" 294 "mn", // 58 langMongolianCyr -Cyrl; no region codes # "mn" 295 "ps", // 59 langPashto -Arab; no region codes 296 "ku", // 60 langKurdish -Arab; no region codes 297 "ks", // 61 langKashmiri -Arab; no region codes 298 "sd", // 62 langSindhi -Arab; no region codes 299 "bo", // 63 langTibetan -Tibt; 300 "ne", // 64 langNepali -Deva; 301 "sa", // 65 langSanskrit -Deva; no region codes 302 "mr", // 66 langMarathi -Deva; 303 "bn", // 67 langBengali -Beng; 304 "as", // 68 langAssamese -Beng; no region codes 305 "gu", // 69 langGujarati -Gujr; 306 "pa", // 70 langPunjabi -Guru; 307 "or", // 71 langOriya -Orya; no region codes 308 "ml", // 72 langMalayalam -Mlym; no region codes 309 "kn", // 73 langKannada -Knda; no region codes 310 "ta", // 74 langTamil -Taml; no region codes 311 "te", // 75 langTelugu -Telu; no region codes 312 "si", // 76 langSinhalese -Sinh; no region codes 313 "my", // 77 langBurmese -Mymr; no region codes 314 "km", // 78 langKhmer -Khmr; no region codes 315 "lo", // 79 langLao -Laoo; no region codes 316 "vi", // 80 langVietnamese -Latn; 317 "id", // 81 langIndonesian -Latn; no region codes 318 "fil", // 82 langTagalog -Latn; no region codes 319 "ms", // 83 langMalayRoman -Latn; no region codes # "ms" 320 "ms-Arab", // 84 langMalayArabic -Arab; no region codes # "ms" 321 "am", // 85 langAmharic -Ethi; no region codes 322 "ti", // 86 langTigrinya -Ethi; no region codes 323 "om", // 87 langOromo -Ethi; no region codes 324 "so", // 88 langSomali -Latn; no region codes 325 "sw", // 89 langSwahili -Latn; no region codes 326 "rw", // 90 langKinyarwanda -Latn; no region codes 327 "rn", // 91 langRundi -Latn; no region codes 328 "ny", // 92 langNyanja/Chewa -Latn; no region codes # "" 329 "mg", // 93 langMalagasy -Latn; no region codes 330 "eo", // 94 langEsperanto -Latn; 331 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 95 to 105 (gap) 332 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 106 to 116 (gap) 333 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, // 107 to 117 (gap) 334 "cy", // 128 langWelsh -Latn; 335 "eu", // 129 langBasque -Latn; no region codes 336 "ca", // 130 langCatalan -Latn; 337 "la", // 131 langLatin -Latn; no region codes 338 "qu", // 132 langQuechua -Latn; no region codes 339 "gn", // 133 langGuarani -Latn; no region codes 340 "ay", // 134 langAymara -Latn; no region codes 341 "tt-Cyrl", // 135 langTatar -Cyrl; no region codes 342 "ug", // 136 langUighur -Arab; no region codes 343 "dz", // 137 langDzongkha -Tibt; 344 "jv", // 138 langJavaneseRom -Latn; no region codes 345 "su", // 139 langSundaneseRom -Latn; no region codes 346 "gl", // 140 langGalician -Latn; no region codes 347 "af", // 141 langAfrikaans -Latn; 348 "br", // 142 langBreton -Latn; 349 "iu", // 143 langInuktitut -Cans; 350 "gd", // 144 langScottishGaelic; 351 "gv", // 145 langManxGaelic -Latn; 352 "ga-Latg", // 146 langIrishGaelicScript -Latn-dots; # "ga" // <xx> 353 "to", // 147 langTongan -Latn; 354 "grc", // 148 langGreekAncient -Grek-poly; # "el" 355 "kl", // 149 langGreenlandic -Latn; 356 "az", // 150 langAzerbaijanRoman -Latn; no region codes # "az" 357 "nn", // 151 langNynorsk -Latn; # (no entry) 358 }; 359 enum { 360 kNumLangCodeToLocaleString = sizeof(langCodeToLocaleString)/sizeof(char *) 361 }; 362 363 static const KeyStringToResultString oldAppleLocaleToCanonical[] = { 364 // Map obsolete/old-style Apple strings to canonical 365 // Must be sorted according to how strcmp compares the strings in the first column 366 // 367 // non-canonical canonical [ comment ] # source/reason for non-canonical string 368 // string string 369 // ------------- --------- 370 { "Afrikaans", "af" }, // # __CFBundleLanguageNamesArray 371 { "Albanian", "sq" }, // # __CFBundleLanguageNamesArray 372 { "Amharic", "am" }, // # __CFBundleLanguageNamesArray 373 { "Arabic", "ar" }, // # __CFBundleLanguageNamesArray 374 { "Armenian", "hy" }, // # __CFBundleLanguageNamesArray 375 { "Assamese", "as" }, // # __CFBundleLanguageNamesArray 376 { "Aymara", "ay" }, // # __CFBundleLanguageNamesArray 377 { "Azerbaijani", "az" }, // -Arab,-Cyrl,-Latn? # __CFBundleLanguageNamesArray (had 3 entries "Azerbaijani" for "az-Arab", "az-Cyrl", "az-Latn") 378 { "Basque", "eu" }, // # __CFBundleLanguageNamesArray 379 { "Belarusian", "be" }, // # handle other names 380 { "Belorussian", "be" }, // # handle other names 381 { "Bengali", "bn" }, // # __CFBundleLanguageNamesArray 382 { "Brazilian Portugese", "pt-BR" }, // # from Installer.app Info.plist IFLanguages key, misspelled 383 { "Brazilian Portuguese", "pt-BR" }, // # correct spelling for above 384 { "Breton", "br" }, // # __CFBundleLanguageNamesArray 385 { "Bulgarian", "bg" }, // # __CFBundleLanguageNamesArray 386 { "Burmese", "my" }, // # __CFBundleLanguageNamesArray 387 { "Byelorussian", "be" }, // # __CFBundleLanguageNamesArray 388 { "Catalan", "ca" }, // # __CFBundleLanguageNamesArray 389 { "Chewa", "ny" }, // # handle other names 390 { "Chichewa", "ny" }, // # handle other names 391 { "Chinese", "zh" }, // -Hans,-Hant? # __CFBundleLanguageNamesArray (had 2 entries "Chinese" for "zh-Hant", "zh-Hans") 392 { "Chinese, Simplified", "zh-Hans" }, // # from Installer.app Info.plist IFLanguages key 393 { "Chinese, Traditional", "zh-Hant" }, // # correct spelling for below 394 { "Chinese, Tradtional", "zh-Hant" }, // # from Installer.app Info.plist IFLanguages key, misspelled 395 { "Croatian", "hr" }, // # __CFBundleLanguageNamesArray 396 { "Czech", "cs" }, // # __CFBundleLanguageNamesArray 397 { "Danish", "da" }, // # __CFBundleLanguageNamesArray 398 { "Dutch", "nl" }, // # __CFBundleLanguageNamesArray (had 2 entries "Dutch" for "nl", "nl-BE") 399 { "Dzongkha", "dz" }, // # __CFBundleLanguageNamesArray 400 { "English", "en" }, // # __CFBundleLanguageNamesArray 401 { "Esperanto", "eo" }, // # __CFBundleLanguageNamesArray 402 { "Estonian", "et" }, // # __CFBundleLanguageNamesArray 403 { "Faroese", "fo" }, // # __CFBundleLanguageNamesArray 404 { "Farsi", "fa" }, // # __CFBundleLanguageNamesArray 405 { "Finnish", "fi" }, // # __CFBundleLanguageNamesArray 406 { "Flemish", "nl-BE" }, // # handle other names 407 { "French", "fr" }, // # __CFBundleLanguageNamesArray 408 { "Galician", "gl" }, // # __CFBundleLanguageNamesArray 409 { "Gallegan", "gl" }, // # handle other names 410 { "Georgian", "ka" }, // # __CFBundleLanguageNamesArray 411 { "German", "de" }, // # __CFBundleLanguageNamesArray 412 { "Greek", "el" }, // # __CFBundleLanguageNamesArray (had 2 entries "Greek" for "el", "grc") 413 { "Greenlandic", "kl" }, // # __CFBundleLanguageNamesArray 414 { "Guarani", "gn" }, // # __CFBundleLanguageNamesArray 415 { "Gujarati", "gu" }, // # __CFBundleLanguageNamesArray 416 { "Hawaiian", "haw" }, // # handle new languages 417 { "Hebrew", "he" }, // # __CFBundleLanguageNamesArray 418 { "Hindi", "hi" }, // # __CFBundleLanguageNamesArray 419 { "Hungarian", "hu" }, // # __CFBundleLanguageNamesArray 420 { "Icelandic", "is" }, // # __CFBundleLanguageNamesArray 421 { "Indonesian", "id" }, // # __CFBundleLanguageNamesArray 422 { "Inuktitut", "iu" }, // # __CFBundleLanguageNamesArray 423 { "Irish", "ga" }, // # __CFBundleLanguageNamesArray (had 2 entries "Irish" for "ga", "ga-dots") 424 { "Italian", "it" }, // # __CFBundleLanguageNamesArray 425 { "Japanese", "ja" }, // # __CFBundleLanguageNamesArray 426 { "Javanese", "jv" }, // # __CFBundleLanguageNamesArray 427 { "Kalaallisut", "kl" }, // # handle other names 428 { "Kannada", "kn" }, // # __CFBundleLanguageNamesArray 429 { "Kashmiri", "ks" }, // # __CFBundleLanguageNamesArray 430 { "Kazakh", "kk" }, // # __CFBundleLanguageNamesArray 431 { "Khmer", "km" }, // # __CFBundleLanguageNamesArray 432 { "Kinyarwanda", "rw" }, // # __CFBundleLanguageNamesArray 433 { "Kirghiz", "ky" }, // # __CFBundleLanguageNamesArray 434 { "Korean", "ko" }, // # __CFBundleLanguageNamesArray 435 { "Kurdish", "ku" }, // # __CFBundleLanguageNamesArray 436 { "Lao", "lo" }, // # __CFBundleLanguageNamesArray 437 { "Latin", "la" }, // # __CFBundleLanguageNamesArray 438 { "Latvian", "lv" }, // # __CFBundleLanguageNamesArray 439 { "Lithuanian", "lt" }, // # __CFBundleLanguageNamesArray 440 { "Macedonian", "mk" }, // # __CFBundleLanguageNamesArray 441 { "Malagasy", "mg" }, // # __CFBundleLanguageNamesArray 442 { "Malay", "ms" }, // -Latn,-Arab? # __CFBundleLanguageNamesArray (had 2 entries "Malay" for "ms-Latn", "ms-Arab") 443 { "Malayalam", "ml" }, // # __CFBundleLanguageNamesArray 444 { "Maltese", "mt" }, // # __CFBundleLanguageNamesArray 445 { "Manx", "gv" }, // # __CFBundleLanguageNamesArray 446 { "Marathi", "mr" }, // # __CFBundleLanguageNamesArray 447 { "Moldavian", "mo" }, // # __CFBundleLanguageNamesArray 448 { "Mongolian", "mn" }, // -Mong,-Cyrl? # __CFBundleLanguageNamesArray (had 2 entries "Mongolian" for "mn-Mong", "mn-Cyrl") 449 { "Nepali", "ne" }, // # __CFBundleLanguageNamesArray 450 { "Norwegian", "nb" }, // # __CFBundleLanguageNamesArray (had "Norwegian" mapping to "no") 451 { "Nyanja", "ny" }, // # __CFBundleLanguageNamesArray 452 { "Nynorsk", "nn" }, // # handle other names (no entry in __CFBundleLanguageNamesArray) 453 { "Oriya", "or" }, // # __CFBundleLanguageNamesArray 454 { "Oromo", "om" }, // # __CFBundleLanguageNamesArray 455 { "Panjabi", "pa" }, // # handle other names 456 { "Pashto", "ps" }, // # __CFBundleLanguageNamesArray 457 { "Persian", "fa" }, // # handle other names 458 { "Polish", "pl" }, // # __CFBundleLanguageNamesArray 459 { "Portuguese", "pt" }, // # __CFBundleLanguageNamesArray 460 { "Portuguese, Brazilian", "pt-BR" }, // # handle other names 461 { "Punjabi", "pa" }, // # __CFBundleLanguageNamesArray 462 { "Pushto", "ps" }, // # handle other names 463 { "Quechua", "qu" }, // # __CFBundleLanguageNamesArray 464 { "Romanian", "ro" }, // # __CFBundleLanguageNamesArray 465 { "Ruanda", "rw" }, // # handle other names 466 { "Rundi", "rn" }, // # __CFBundleLanguageNamesArray 467 { "Russian", "ru" }, // # __CFBundleLanguageNamesArray 468 { "Sami", "se" }, // # __CFBundleLanguageNamesArray 469 { "Sanskrit", "sa" }, // # __CFBundleLanguageNamesArray 470 { "Scottish", "gd" }, // # __CFBundleLanguageNamesArray 471 { "Serbian", "sr" }, // # __CFBundleLanguageNamesArray 472 { "Simplified Chinese", "zh-Hans" }, // # handle other names 473 { "Sindhi", "sd" }, // # __CFBundleLanguageNamesArray 474 { "Sinhalese", "si" }, // # __CFBundleLanguageNamesArray 475 { "Slovak", "sk" }, // # __CFBundleLanguageNamesArray 476 { "Slovenian", "sl" }, // # __CFBundleLanguageNamesArray 477 { "Somali", "so" }, // # __CFBundleLanguageNamesArray 478 { "Spanish", "es" }, // # __CFBundleLanguageNamesArray 479 { "Sundanese", "su" }, // # __CFBundleLanguageNamesArray 480 { "Swahili", "sw" }, // # __CFBundleLanguageNamesArray 481 { "Swedish", "sv" }, // # __CFBundleLanguageNamesArray 482 { "Tagalog", "fil" }, // # __CFBundleLanguageNamesArray 483 { "Tajik", "tg" }, // # handle other names 484 { "Tajiki", "tg" }, // # __CFBundleLanguageNamesArray 485 { "Tamil", "ta" }, // # __CFBundleLanguageNamesArray 486 { "Tatar", "tt" }, // # __CFBundleLanguageNamesArray 487 { "Telugu", "te" }, // # __CFBundleLanguageNamesArray 488 { "Thai", "th" }, // # __CFBundleLanguageNamesArray 489 { "Tibetan", "bo" }, // # __CFBundleLanguageNamesArray 490 { "Tigrinya", "ti" }, // # __CFBundleLanguageNamesArray 491 { "Tongan", "to" }, // # __CFBundleLanguageNamesArray 492 { "Traditional Chinese", "zh-Hant" }, // # handle other names 493 { "Turkish", "tr" }, // # __CFBundleLanguageNamesArray 494 { "Turkmen", "tk" }, // # __CFBundleLanguageNamesArray 495 { "Uighur", "ug" }, // # __CFBundleLanguageNamesArray 496 { "Ukrainian", "uk" }, // # __CFBundleLanguageNamesArray 497 { "Urdu", "ur" }, // # __CFBundleLanguageNamesArray 498 { "Uzbek", "uz" }, // # __CFBundleLanguageNamesArray 499 { "Vietnamese", "vi" }, // # __CFBundleLanguageNamesArray 500 { "Welsh", "cy" }, // # __CFBundleLanguageNamesArray 501 { "Yiddish", "yi" }, // # __CFBundleLanguageNamesArray 502 { "ar_??", "ar" }, // # from old MapScriptInfoAndISOCodes 503 { "az.Ar", "az-Arab" }, // # from old LocaleRefGetPartString 504 { "az.Cy", "az-Cyrl" }, // # from old LocaleRefGetPartString 505 { "az.La", "az" }, // # from old LocaleRefGetPartString 506 { "be_??", "be_BY" }, // # from old MapScriptInfoAndISOCodes 507 { "bn_??", "bn" }, // # from old LocaleRefGetPartString 508 { "bo_??", "bo" }, // # from old MapScriptInfoAndISOCodes 509 { "br_??", "br" }, // # from old MapScriptInfoAndISOCodes 510 { "cy_??", "cy" }, // # from old MapScriptInfoAndISOCodes 511 { "de-96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> 512 { "de_96", "de-1996" }, // # from old MapScriptInfoAndISOCodes // <1.9> 513 { "de_??", "de-1996" }, // # from old MapScriptInfoAndISOCodes 514 { "el.El-P", "grc" }, // # from old LocaleRefGetPartString 515 { "en-ascii", "en_001" }, // # from earlier version of tables in this file! 516 { "en_??", "en_001" }, // # from old MapScriptInfoAndISOCodes 517 { "eo_??", "eo" }, // # from old MapScriptInfoAndISOCodes 518 { "es_??", "es_419" }, // # from old MapScriptInfoAndISOCodes 519 { "es_XL", "es_419" }, // # from earlier version of tables in this file! 520 { "fr_??", "fr_001" }, // # from old MapScriptInfoAndISOCodes 521 { "ga-dots", "ga-Latg" }, // # from earlier version of tables in this file! // <1.8> 522 { "ga-dots_IE", "ga-Latg_IE" }, // # from earlier version of tables in this file! // <1.8> 523 { "ga.Lg", "ga-Latg" }, // # from old LocaleRefGetPartString // <1.8> 524 { "ga.Lg_IE", "ga-Latg_IE" }, // # from old LocaleRefGetPartString // <1.8> 525 { "gd_??", "gd" }, // # from old MapScriptInfoAndISOCodes 526 { "gv_??", "gv" }, // # from old MapScriptInfoAndISOCodes 527 { "jv.La", "jv" }, // # logical extension // <1.9> 528 { "jw.La", "jv" }, // # from old LocaleRefGetPartString 529 { "kk.Cy", "kk" }, // # from old LocaleRefGetPartString 530 { "kl.La", "kl" }, // # from old LocaleRefGetPartString 531 { "kl.La_GL", "kl_GL" }, // # from old LocaleRefGetPartString // <1.9> 532 { "lp_??", "se" }, // # from old MapScriptInfoAndISOCodes 533 { "mk_??", "mk_MK" }, // # from old MapScriptInfoAndISOCodes 534 { "mn.Cy", "mn" }, // # from old LocaleRefGetPartString 535 { "mn.Mn", "mn-Mong" }, // # from old LocaleRefGetPartString 536 { "ms.Ar", "ms-Arab" }, // # from old LocaleRefGetPartString 537 { "ms.La", "ms" }, // # from old LocaleRefGetPartString 538 { "nl-be", "nl-BE" }, // # from old LocaleRefGetPartString 539 { "nl-be_BE", "nl_BE" }, // # from old LocaleRefGetPartString 540 { "no-NO", "nb-NO" }, // # not handled by localeStringPrefixToCanonical 541 { "no-NO_NO", "nb-NO_NO" }, // # not handled by localeStringPrefixToCanonical 542 // { "no-bok_NO", "nb_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical 543 // { "no-nyn_NO", "nn_NO" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical 544 // { "nya", "ny" }, // # from old LocaleRefGetPartString - handled by localeStringPrefixToCanonical 545 { "pa_??", "pa" }, // # from old LocaleRefGetPartString 546 { "sa.Dv", "sa" }, // # from old LocaleRefGetPartString 547 { "sl_??", "sl_SI" }, // # from old MapScriptInfoAndISOCodes 548 { "sr_??", "sr_RS" }, // # from old MapScriptInfoAndISOCodes // <1.18> 549 { "su.La", "su" }, // # from old LocaleRefGetPartString 550 { "yi.He", "yi" }, // # from old LocaleRefGetPartString 551 { "zh-simp", "zh-Hans" }, // # from earlier version of tables in this file! 552 { "zh-trad", "zh-Hant" }, // # from earlier version of tables in this file! 553 { "zh.Ha-S", "zh-Hans" }, // # from old LocaleRefGetPartString 554 { "zh.Ha-S_CN", "zh_CN" }, // # from old LocaleRefGetPartString 555 { "zh.Ha-T", "zh-Hant" }, // # from old LocaleRefGetPartString 556 { "zh.Ha-T_TW", "zh_TW" }, // # from old LocaleRefGetPartString 557 }; 558 enum { 559 kNumOldAppleLocaleToCanonical = sizeof(oldAppleLocaleToCanonical)/sizeof(KeyStringToResultString) 560 }; 561 562 static const KeyStringToResultString localeStringPrefixToCanonical[] = { 563 // Map 3-letter & obsolete ISO 639 codes, plus obsolete RFC 3066 codes, to 2-letter ISO 639 code. 564 // (special cases for 'sh' handled separately) 565 // First column must be all lowercase; must be sorted according to how strcmp compares the strings in the first column. 566 // 567 // non-canonical canonical [ comment ] # source/reason for non-canonical string 568 // prefix prefix 569 // ------------- --------- 570 571 { "aar", "aa" }, // Afar 572 // { "aa_SAAHO", "ssy" }, // Saho # deprecated/grandfathered, handled as a special case 573 { "abk", "ab" }, // Abkhazian 574 { "afr", "af" }, // Afrikaans 575 { "aju", "jrb" }, // Moroccan Judeo-Arabic -> Judeo-Arabic (macrolang.) 576 { "aka", "ak" }, // Akan 577 { "alb", "sq" }, // Albanian 578 { "als", "sq" }, // Tosk Albanian -> Albanian (macrolang.) 579 { "amh", "am" }, // Amharic 580 { "ara", "ar" }, // Arabic 581 { "arb", "ar" }, // Std Arabic -> Arabic (macrolang.) 582 { "arg", "an" }, // Aragonese 583 { "arm", "hy" }, // Armenian 584 { "art-lojban", "jbo" }, // Lojban # deprecated/grandfathered 585 { "asm", "as" }, // Assamese 586 { "ava", "av" }, // Avaric 587 { "ave", "ae" }, // Avestan 588 { "aym", "ay" }, // Aymara 589 { "ayr", "ay" }, // Central Aymara -> Aymara (macrolang.) 590 { "aze", "az" }, // Azerbaijani 591 { "azj", "az" }, // N.Azerbaijani -> Azerbaijani (macrolang.) 592 { "bak", "ba" }, // Bashkir 593 { "bam", "bm" }, // Bambara 594 { "baq", "eu" }, // Basque 595 { "bcc", "bal" }, // Balochi, Southern -> Baluchi (macrolang.) 596 { "bcl", "bik" }, // Bicolano, Central -> Bikol (macrolang.) 597 { "bel", "be" }, // Belarusian 598 { "ben", "bn" }, // Bengali 599 { "bih", "bh" }, // Bihari 600 { "bis", "bi" }, // Bislama 601 { "bod", "bo" }, // Tibetan 602 { "bos", "bs" }, // Bosnian 603 { "bre", "br" }, // Breton 604 { "bul", "bg" }, // Bulgarian 605 { "bur", "my" }, // Burmese 606 { "bxk", "luy" }, // Lubukusu -> Luyia (macrolang.) 607 { "bxr", "bua" }, // Buriat, Russia -> Buriat (macrolang.) 608 { "cat", "ca" }, // Catalan 609 { "ces", "cs" }, // Czech 610 { "cha", "ch" }, // Chamorro 611 { "che", "ce" }, // Chechen 612 { "chi", "zh" }, // Chinese 613 { "chu", "cu" }, // Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic 614 { "chv", "cv" }, // Chuvash 615 { "cld", "syr" }, // Chaldean Neo-Aramaic -> Syriac (macrolang.) 616 { "cmn", "zh" }, // Mandarin -> Chinese (macrolang.) 617 { "cor", "kw" }, // Cornish 618 { "cos", "co" }, // Corsican 619 { "cre", "cr" }, // Cree 620 { "cwd", "cr" }, // Cree, Woods -> Cree (macrolang.) 621 { "cym", "cy" }, // Welsh 622 { "cze", "cs" }, // Czech 623 { "dan", "da" }, // Danish 624 { "deu", "de" }, // German 625 { "dgo", "doi" }, // Dogri -> Dogri (macrolang.) 626 { "dhd", "mwr" }, // Dhundari -> Marwari (macrolang.) 627 { "dik", "din" }, // Southwestern Dinka -> Dinka (macrolang.) 628 { "diq", "zza" }, // Dimli -> Zaza (macrolang.) 629 { "div", "dv" }, // Dhivehi, Divehi, Maldivian 630 { "dut", "nl" }, // Dutch 631 { "dzo", "dz" }, // Dzongkha 632 { "ekk", "et" }, // Std Estonian -> Estonian (macrolang.) 633 { "ell", "el" }, // Greek, Modern (1453-) 634 { "emk", "man" }, // Maninkakan, Eastern -> Mandingo (macrolang.) 635 { "eng", "en" }, // English 636 { "epo", "eo" }, // Esperanto 637 { "esk", "ik" }, // Northwest Alaska Inupiatun -> Inupiaq (macrolang.) 638 { "est", "et" }, // Estonian 639 { "eus", "eu" }, // Basque 640 { "ewe", "ee" }, // Ewe 641 { "fao", "fo" }, // Faroese 642 { "fas", "fa" }, // Persian 643 { "fat", "ak" }, // Fanti -> Akan (macrolang.) 644 { "fij", "fj" }, // Fijian 645 { "fin", "fi" }, // Finnish 646 { "fra", "fr" }, // French 647 { "fre", "fr" }, // French 648 { "fry", "fy" }, // Western Frisian 649 { "fuc", "ff" }, // Pular -> Fulah (macrolang.) 650 { "ful", "ff" }, // Fulah 651 { "gaz", "om" }, // W.Central Oromo -> Oromo (macrolang.) 652 { "gbo", "grb" }, // Northern Grebo -> Grebo (macrolang.) 653 { "geo", "ka" }, // Georgian 654 { "ger", "de" }, // German 655 { "gla", "gd" }, // Gaelic,Scottish 656 { "gle", "ga" }, // Irish 657 { "glg", "gl" }, // Gallegan 658 { "glv", "gv" }, // Manx 659 { "gno", "gon" }, // Northern Gondi -> Gondi (macrolang.) 660 { "gre", "el" }, // Greek, Modern (1453-) 661 { "grn", "gn" }, // Guarani 662 { "gug", "gn" }, // Paraguayan Guarani -> Guarani (macrolang.) 663 { "guj", "gu" }, // Gujarati 664 { "gya", "gba" }, // Northwest Gbaya -> Gbaya (Cent. Afr. Rep.) (macrolang.) 665 { "hat", "ht" }, // Haitian, Haitian Creole 666 { "hau", "ha" }, // Hausa 667 { "hbs", "sr_Latn" }, // Serbo-Croatian 668 { "hdn", "hai" }, // Northern Haida -> Haida (macrolang.) 669 { "hea", "hmn" }, // Northern Qiandong Miao -> Hmong (macrolang.) 670 { "heb", "he" }, // Hebrew 671 { "her", "hz" }, // Herero 672 { "him", "srx" }, // Himachali -> Sirmauri (= Pahari, Himachali) (macrolang.) 673 { "hin", "hi" }, // Hindi 674 { "hmo", "ho" }, // Hiri Motu 675 { "hrv", "hr" }, // Croatian 676 { "hun", "hu" }, // Hungarian 677 { "hye", "hy" }, // Armenian 678 { "i-ami", "ami" }, // Amis # deprecated/grandfathered 679 { "i-bnn", "bnn" }, // Bunun # deprecated/grandfathered 680 { "i-hak", "hak" }, // Hakka # deprecated RFC 3066 681 { "i-klingon", "tlh" }, // Klingon # deprecated/grandfathered 682 { "i-lux", "lb" }, // Luxembourgish # deprecated RFC 3066 683 { "i-navajo", "nv" }, // Navajo # deprecated RFC 3066 684 { "i-pwn", "pwn" }, // Paiwan # deprecated/grandfathered 685 { "i-tao", "tao" }, // Tao # deprecated/grandfathered 686 { "i-tay", "tay" }, // Tayal # deprecated/grandfathered 687 { "i-tsu", "tsu" }, // Tsou # deprecated/grandfathered 688 { "ibo", "ig" }, // Igbo 689 { "ice", "is" }, // Icelandic 690 { "ido", "io" }, // Ido 691 { "iii", "ii" }, // Sichuan Yi, Nuosu 692 { "ike", "iu" }, // E.Canada Inuktitut -> Inuktitut (macrolang.) 693 { "iku", "iu" }, // Inuktitut 694 { "ile", "ie" }, // Interlingue 695 { "in", "id" }, // Indonesian # deprecated 639 code in -> id (1989) 696 { "ina", "ia" }, // Interlingua 697 { "ind", "id" }, // Indonesian 698 { "ipk", "ik" }, // Inupiaq 699 { "isl", "is" }, // Icelandic 700 { "ita", "it" }, // Italian 701 { "iw", "he" }, // Hebrew # deprecated 639 code iw -> he (1989) 702 { "jav", "jv" }, // Javanese 703 { "jaw", "jv" }, // Javanese # deprecated 639 code jaw -> jv (2001) 704 { "ji", "yi" }, // Yiddish # deprecated 639 code ji -> yi (1989) 705 { "jpn", "ja" }, // Japanese 706 { "jw", "jv" }, // Javanese # deprecated 707 { "kal", "kl" }, // Kalaallisut 708 { "kan", "kn" }, // Kannada 709 { "kas", "ks" }, // Kashmiri 710 { "kat", "ka" }, // Georgian 711 { "kau", "kr" }, // Kanuri 712 { "kaz", "kk" }, // Kazakh 713 { "khk", "mn" }, // Halh Mongolian [mainly Cyrl] -> Mongolian (macrolang.) 714 { "khm", "km" }, // Khmer 715 { "kik", "ki" }, // Kikuyu, Gikuyu 716 { "kin", "rw" }, // Kinyarwanda 717 { "kir", "ky" }, // Kirghiz 718 { "kmr", "ku" }, // Northern Kurdish -> Kurdish (macrolang.) 719 { "knc", "kr" }, // Central Kanuri -> Kanuri (macrolang.) 720 { "kng", "kg" }, // Koongo -> Kongo (macrolang.) 721 { "knn", "kok" }, // Konkani (individ.lang) -> Konkani (macrolang.) 722 { "kom", "kv" }, // Komi 723 { "kon", "kg" }, // Kongo 724 { "kor", "ko" }, // Korean 725 { "kpv", "kv" }, // Komi-Zyrian -> Komi (macrolang.) 726 { "kua", "kj" }, // Kuanyama, Kwanyama 727 { "kur", "ku" }, // Kurdish 728 { "lao", "lo" }, // Lao 729 { "lat", "la" }, // Latin 730 { "lav", "lv" }, // Latvian 731 { "lbk", "bnc" }, // Central Bontok -> Bontok (macrolang.) 732 { "lim", "li" }, // Limburgan, Limburger, Limburgish 733 { "lin", "ln" }, // Lingala 734 { "lit", "lt" }, // Lithuanian 735 { "ltz", "lb" }, // Letzeburgesch 736 { "lub", "lu" }, // Luba-Katanga 737 { "lug", "lg" }, // Ganda 738 { "lvs", "lv" }, // Std Latvian -> Latvian (macrolang.) 739 { "mac", "mk" }, // Macedonian 740 { "mal", "ml" }, // Malayalam 741 { "mar", "mr" }, // Marathi 742 { "may", "ms" }, // Malay 743 { "mhr", "chm" }, // Mari, Eastern -> Mari (Russia) (macrolang.) 744 { "mkd", "mk" }, // Macedonian 745 { "mlg", "mg" }, // Malagasy 746 { "mlt", "mt" }, // Maltese 747 { "mol", "mo" }, // Moldavian 748 { "mon", "mn" }, // Mongolian 749 { "msa", "ms" }, // Malay 750 { "mup", "raj" }, // Malvi -> Rajasthani (macrolang.) 751 { "mya", "my" }, // Burmese 752 { "nau", "na" }, // Nauru 753 { "nav", "nv" }, // Navajo, Navaho 754 { "nbl", "nr" }, // South Ndebele 755 { "nde", "nd" }, // North Ndebele 756 { "ndo", "ng" }, // Ndonga 757 { "nep", "ne" }, // Nepali 758 { "nld", "nl" }, // Dutch 759 { "nno", "nn" }, // Norwegian Nynorsk 760 { "no", "nb" }, // Norwegian generic # ambiguous 639 code no -> nb 761 { "no-bok", "nb" }, // Norwegian Bokmal # deprecated RFC 3066 tag - used in old LocaleRefGetPartString 762 { "no-nyn", "nn" }, // Norwegian Nynorsk # deprecated RFC 3066 tag - used in old LocaleRefGetPartString 763 { "nob", "nb" }, // Norwegian Bokmal 764 { "nor", "nb" }, // Norwegian generic # ambiguous 639 code nor -> nb 765 // { "no_BOKMAL", "nb" }, // Norwegian Bokmal # deprecated/grandfathered, handled as a special case 766 // { "no_NYNORSK", "nn" }, // Norwegian Nynorsk # deprecated/grandfathered, handled as a special case 767 { "nya", "ny" }, // Nyanja/Chewa/Chichewa # 3-letter code used in old LocaleRefGetPartString 768 { "oci", "oc" }, // Occitan/Provencal 769 { "ojg", "oj" }, // Ojibwa, Eastern -> Ojibwa (macrolang.) 770 { "oji", "oj" }, // Ojibwa 771 { "ori", "or" }, // Oriya 772 { "orm", "om" }, // Oromo,Galla 773 { "oss", "os" }, // Ossetian, Ossetic 774 { "pan", "pa" }, // Panjabi 775 { "pbu", "ps" }, // N.Pashto, -> Pushto (macrolang.) 776 { "per", "fa" }, // Persian 777 { "pes", "fa" }, // W.Farsi -> Persian (macrolang.) 778 { "pli", "pi" }, // Pali 779 { "plt", "mg" }, // Plateau Malagasy -> Malagasy (macrolang.) 780 { "pnb", "lah" }, // W.Panjabi -> Lahnda (macrolang.) 781 { "pol", "pl" }, // Polish 782 { "por", "pt" }, // Portuguese 783 { "pus", "ps" }, // Pushto 784 { "que", "qu" }, // Quechua 785 { "qxp", "qu" }, // Puno Quechua -> Quechua (macrolang.) 786 { "rmy", "rom" }, // Vlax Romani -> Romany (macrolang.) 787 { "roh", "rm" }, // Raeto-Romance 788 { "ron", "ro" }, // Romanian 789 { "rum", "ro" }, // Romanian 790 { "run", "rn" }, // Rundi 791 { "rus", "ru" }, // Russian 792 { "sag", "sg" }, // Sango 793 { "san", "sa" }, // Sanskrit 794 { "scc", "sr" }, // Serbian 795 { "scr", "hr" }, // Croatian 796 { "sgn-be-fr", "sfb" }, // Belgian-French Sign Lang. # deprecated/grandfathered 797 { "sgn-be-nl", "vgt" }, // Belgian-Flemish Sign Lang. # deprecated/grandfathered 798 { "sgn-ch-de", "sgg" }, // Swiss German Sign Lang. # deprecated/grandfathered 799 { "sin", "si" }, // Sinhalese 800 { "slk", "sk" }, // Slovak 801 { "slo", "sk" }, // Slovak 802 { "slv", "sl" }, // Slovenian 803 { "sme", "se" }, // Sami,Northern 804 { "smo", "sm" }, // Samoan 805 { "sna", "sn" }, // Shona 806 { "snd", "sd" }, // Sindhi 807 { "som", "so" }, // Somali 808 { "sot", "st" }, // Southern Sotho 809 { "spa", "es" }, // Spanish 810 { "spy", "kln" }, // Sabaot -> Kalenjin (macrolang.) 811 { "sqi", "sq" }, // Albanian 812 { "src", "sc" }, // Sardinian, Logudorese -> Sardinian (macrolang.) 813 { "srd", "sc" }, // Sardinian 814 { "srp", "sr" }, // Serbian 815 { "ssw", "ss" }, // Swati 816 { "sun", "su" }, // Sundanese 817 { "swa", "sw" }, // Swahili 818 { "swe", "sv" }, // Swedish 819 { "swh", "sw" }, // Swahili (individ.lang) -> Swahili (macrolang.) 820 { "tah", "ty" }, // Tahitian 821 { "tam", "ta" }, // Tamil 822 { "tat", "tt" }, // Tatar 823 { "tel", "te" }, // Telugu 824 { "tgk", "tg" }, // Tajik 825 { "tgl", "fil" }, // Tagalog 826 { "tha", "th" }, // Thai 827 { "tib", "bo" }, // Tibetan 828 { "tir", "ti" }, // Tigrinya 829 { "tl", "fil" }, // Tagalog # legacy 830 { "ton", "to" }, // Tongan 831 { "tsn", "tn" }, // Tswana 832 { "tso", "ts" }, // Tsonga 833 { "ttq", "tmh" }, // Tamajaq, Tawallammat -> Tamashek (macrolang.) 834 { "tuk", "tk" }, // Turkmen 835 { "tur", "tr" }, // Turkish 836 { "tw", "ak" }, // Twi -> Akan (macrolang.) 837 { "twi", "ak" }, // Twi 838 { "uig", "ug" }, // Uighur 839 { "ukr", "uk" }, // Ukrainian 840 { "umu", "del" }, // Munsee -> Delaware (macrolang.) 841 { "urd", "ur" }, // Urdu 842 { "uzb", "uz" }, // Uzbek 843 { "uzn", "uz" }, // N. Uzbek -> Uzbek (macrolang.) 844 { "ven", "ve" }, // Venda 845 { "vie", "vi" }, // Vietnamese 846 { "vol", "vo" }, // Volapük 847 { "wel", "cy" }, // Welsh 848 { "wln", "wa" }, // Walloon 849 { "wol", "wo" }, // Wolof 850 { "xho", "xh" }, // Xhosa 851 { "xpe", "kpe" }, // Kpelle, Liberia -> Kpelle (macrolang.) 852 { "xsl", "den" }, // Slavey, South -> Slave (Athapascan) (macrolang.) 853 { "ydd", "yi" }, // Yiddish,E. -> Yiddish (macrolang.) 854 { "yid", "yi" }, // Yiddish 855 { "yor", "yo" }, // Yoruba 856 { "zai", "zap" }, // Zapotec, Isthmus -> Zapotec (macrolang.) 857 { "zh-cdo", "cdo" }, // Chinese, Min Dong # extlang 858 { "zh-cjy", "cjy" }, // Chinese, Jinyu # extlang 859 { "zh-cmn", "zh" }, // Chinese, Mandarin # extlang 860 { "zh-cpx", "cpx" }, // Chinese, Pu-Xian # extlang 861 { "zh-czh", "czh" }, // Chinese, Huizhou # extlang 862 { "zh-czo", "czo" }, // Chinese, Min Zhong # extlang 863 { "zh-gan", "gan" }, // Chinese, Gan # extlang 864 { "zh-guoyu", "zh" }, // Mandarin/Std Chinese # deprecated 865 { "zh-hak", "hak" }, // Chinese, Hakka # extlang 866 { "zh-hakka", "hak" }, // Hakka # deprecated 867 { "zh-hsn", "hsn" }, // Chinese, Xiang # extlang 868 { "zh-min-nan", "nan" }, // Minnan,Hokkien,Taiwanese,So. Fujian # deprecated 869 { "zh-mnp", "mnp" }, // Chinese, Min Bei # extlang 870 { "zh-nan", "nan" }, // Chinese, Min Nan # extlang 871 { "zh-wuu", "wuu" }, // Chinese, Wu # extlang 872 { "zh-xiang", "hsn" }, // Xiang/Hunanese # deprecated 873 { "zh-yue", "yue" }, // Chinese, Yue # extlang 874 { "zha", "za" }, // Zhuang, Chuang 875 { "zho", "zh" }, // Chinese 876 { "zsm", "ms" }, // Std Malay -> Malay (macrolang.) 877 { "zul", "zu" }, // Zulu 878 { "zyb", "za" }, // Yongbei Zhuang -> Zhuang (macrolang.) 879 }; 880 enum { 881 kNumLocaleStringPrefixToCanonical = sizeof(localeStringPrefixToCanonical)/sizeof(KeyStringToResultString) 882 }; 883 884 885 static const SpecialCaseUpdates specialCases[] = { 886 // Data for special cases 887 // a) The 3166 code CS was used for Czechoslovakia until 1993, when that country split and the code was 888 // replaced by CZ and SK. Then in 2003-07, the code YU (formerly designating all of Yugoslavia, then after 889 // the 1990s breakup just designating what is now Serbia and Montenegro) was changed to CS! Then after 890 // Serbia and Montenegro split, the code CS was replaced in 2006-09 with separate codes RS and ME. If we 891 // see CS but a language of cs or sk, we change CS to CZ or SK. Otherwise, we change CS (and old YU) to RS. 892 // b) The 639 code sh for Serbo-Croatian was also replaced in the 1990s by separate codes hr and sr, and 893 // deprecated in 2000. We guess which one to map it to as follows: If there is a region tag of HR we use 894 // hr; if there is a region tag of (now) RS we use sr; else we do not change it (not enough info). 895 // c) There are other codes that have been updated without these issues (eg. TP to TL), plus among the 896 // "exceptionally reserved" codes some are just alternates for standard codes (eg. UK for GB). 897 { NULL, "-UK", "GB", NULL, NULL }, // always change UK to GB (UK is "exceptionally reserved" to mean GB) 898 { NULL, "-TP", "TL", NULL, NULL }, // always change TP to TL (East Timor, code changed 2002-05) 899 { "cs", "-CS", "CZ", NULL, NULL }, // if language is cs, change CS (pre-1993 Czechoslovakia) to CZ (Czech Republic) 900 { "sk", "-CS", "SK", NULL, NULL }, // if language is sk, change CS (pre-1993 Czechoslovakia) to SK (Slovakia) 901 { NULL, "-CS", "RS", NULL, NULL }, // otherwise map CS (assume Serbia+Montenegro) to RS (Serbia) 902 { NULL, "-YU", "RS", NULL, NULL }, // also map old YU (assume Serbia+Montenegro) to RS (Serbia) 903 { "sh", "-HR", "hr", "-RS", "sr" }, // then if language is old 'sh' (SerboCroatian), change it to 'hr' (Croatian) 904 // if we find HR (Croatia) or to 'sr' (Serbian) if we find RS (Serbia). 905 // Note: Do this after changing YU/CS toRS as above. 906 { NULL, NULL, NULL, NULL, NULL } // terminator 907 }; 908 909 910 static const KeyStringToResultString localeStringRegionToDefaults[] = { 911 // For some region-code suffixes, there are default substrings to strip off for canonical string. 912 // Must be sorted according to how strcmp compares the strings in the first column 913 // 914 // region default writing 915 // suffix system tags, strip comment 916 // -------- ------------- --------- 917 { "_CN", "-Hans" }, // mainland China, default is simplified 918 { "_HK", "-Hant" }, // Hong Kong, default is traditional 919 { "_MO", "-Hant" }, // Macao, default is traditional 920 { "_SG", "-Hans" }, // Singapore, default is simplified 921 { "_TW", "-Hant" }, // Taiwan, default is traditional 922 }; 923 enum { 924 kNumLocaleStringRegionToDefaults = sizeof(localeStringRegionToDefaults)/sizeof(KeyStringToResultString) 925 }; 926 927 static const KeyStringToResultString localeStringPrefixToDefaults[] = { 928 // For some initial portions of language tag, there are default substrings to strip off for canonical string. 929 // Must be sorted according to how strcmp compares the strings in the first column 930 // 931 // language default writing 932 // tag prefix system tags, strip comment 933 // -------- ------------- --------- 934 { "ab-", "-Cyrl" }, // Abkhazian 935 { "af-", "-Latn" }, // Afrikaans 936 { "agq-", "-Latn" }, // Aghem 937 { "ak-", "-Latn" }, // Akan 938 { "am-", "-Ethi" }, // Amharic 939 { "ar-", "-Arab" }, // Arabic 940 { "as-", "-Beng" }, // Assamese 941 { "asa-", "-Latn" }, // Asu 942 { "ay-", "-Latn" }, // Aymara 943 { "az-", "-Latn" }, // Azerbaijani 944 { "bas-", "-Latn" }, // Basaa 945 { "be-", "-Cyrl" }, // Belarusian 946 { "bem-", "-Latn" }, // Bemba 947 { "bez-", "-Latn" }, // Bena 948 { "bg-", "-Cyrl" }, // Bulgarian 949 { "bm-", "-Latn" }, // Bambara 950 { "bn-", "-Beng" }, // Bengali 951 { "bo-", "-Tibt" }, // Tibetan (? not Suppress-Script) 952 { "br-", "-Latn" }, // Breton (? not Suppress-Script) 953 { "brx-", "-Deva" }, // Bodo 954 { "bs-", "-Latn" }, // Bosnian 955 { "ca-", "-Latn" }, // Catalan 956 { "cgg-", "-Latn" }, // Chiga 957 { "chr-", "-Cher" }, // Cherokee 958 { "cs-", "-Latn" }, // Czech 959 { "cy-", "-Latn" }, // Welsh 960 { "da-", "-Latn" }, // Danish 961 { "dav-", "-Latn" }, // Taita 962 { "de-", "-Latn -1901" }, // German, traditional orthography 963 { "dje-", "-Latn" }, // Zarma 964 { "dua-", "-Latn" }, // Duala 965 { "dv-", "-Thaa" }, // Divehi/Maldivian 966 { "dyo-", "-Latn" }, // Jola-Fonyi 967 { "dz-", "-Tibt" }, // Dzongkha 968 { "ebu-", "-Latn" }, // Embu 969 { "ee-", "-Latn" }, // Ewe 970 { "el-", "-Grek" }, // Greek (modern, monotonic) 971 { "en-", "-Latn" }, // English 972 { "eo-", "-Latn" }, // Esperanto 973 { "es-", "-Latn" }, // Spanish 974 { "et-", "-Latn" }, // Estonian 975 { "eu-", "-Latn" }, // Basque 976 { "ewo-", "-Latn" }, // Ewondo 977 { "fa-", "-Arab" }, // Farsi 978 { "ff-", "-Latn" }, // Fulah 979 { "fi-", "-Latn" }, // Finnish 980 { "fil-", "-Latn" }, // Tagalog 981 { "fo-", "-Latn" }, // Faroese 982 { "fr-", "-Latn" }, // French 983 { "ga-", "-Latn" }, // Irish 984 { "gd-", "-Latn" }, // Scottish Gaelic (? not Suppress-Script) 985 { "gl-", "-Latn" }, // Galician 986 { "gn-", "-Latn" }, // Guarani 987 { "gsw-", "-Latn" }, // Swiss German 988 { "gu-", "-Gujr" }, // Gujarati 989 { "guz-", "-Latn" }, // Gusii 990 { "gv-", "-Latn" }, // Manx 991 { "ha-", "-Latn" }, // Hausa 992 { "haw-", "-Latn" }, // Hawaiian (? not Suppress-Script) 993 { "he-", "-Hebr" }, // Hebrew 994 { "hi-", "-Deva" }, // Hindi 995 { "hr-", "-Latn" }, // Croatian 996 { "hu-", "-Latn" }, // Hungarian 997 { "hy-", "-Armn" }, // Armenian 998 { "id-", "-Latn" }, // Indonesian 999 { "ig-", "-Latn" }, // Igbo 1000 { "ii-", "-Yiii" }, // Sichuan Yi 1001 { "is-", "-Latn" }, // Icelandic 1002 { "it-", "-Latn" }, // Italian 1003 { "ja-", "-Jpan" }, // Japanese 1004 { "jmc-", "-Latn" }, // Machame 1005 { "ka-", "-Geor" }, // Georgian 1006 { "kab-", "-Latn" }, // Kabyle 1007 { "kam-", "-Latn" }, // Kamba 1008 { "kde-", "-Latn" }, // Makonde 1009 { "kea-", "-Latn" }, // Kabuverdianu 1010 { "khq-", "-Latn" }, // Koyra Chiini 1011 { "ki-", "-Latn" }, // Kikuyu 1012 { "kk-", "-Cyrl" }, // Kazakh 1013 { "kl-", "-Latn" }, // Kalaallisut/Greenlandic 1014 { "km-", "-Khmr" }, // Central Khmer 1015 { "kn-", "-Knda" }, // Kannada 1016 { "ko-", "-Hang" }, // Korean (? not Suppress-Script) 1017 { "kok-", "-Deva" }, // Konkani 1018 { "ksb-", "-Latn" }, // Shambala 1019 { "ksf-", "-Latn" }, // Bafia 1020 { "kw-", "-Latn" }, // Cornish 1021 { "ky-", "-Cyrl" }, // Kirghiz 1022 { "la-", "-Latn" }, // Latin 1023 { "lag-", "-Latn" }, // Langi 1024 { "lb-", "-Latn" }, // Luxembourgish 1025 { "lg-", "-Latn" }, // Ganda 1026 { "ln-", "-Latn" }, // Lingala 1027 { "lo-", "-Laoo" }, // Lao 1028 { "lt-", "-Latn" }, // Lithuanian 1029 { "lu-", "-Latn" }, // Luba-Katanga 1030 { "luo-", "-Latn" }, // Luo 1031 { "luy-", "-Latn" }, // Luyia 1032 { "lv-", "-Latn" }, // Latvian 1033 { "mas-", "-Latn" }, // Masai 1034 { "mer-", "-Latn" }, // Meru 1035 { "mfe-", "-Latn" }, // Morisyen 1036 { "mg-", "-Latn" }, // Malagasy 1037 { "mgh-", "-Latn" }, // Makhuwa-Meetto 1038 { "mk-", "-Cyrl" }, // Macedonian 1039 { "ml-", "-Mlym" }, // Malayalam 1040 { "mn-", "-Cyrl" }, // Mongolian 1041 { "mo-", "-Latn" }, // Moldavian 1042 { "mr-", "-Deva" }, // Marathi 1043 { "ms-", "-Latn" }, // Malay 1044 { "mt-", "-Latn" }, // Maltese 1045 { "mua-", "-Latn" }, // Mundang 1046 { "my-", "-Mymr" }, // Burmese/Myanmar 1047 { "naq-", "-Latn" }, // Nama 1048 { "nb-", "-Latn" }, // Norwegian Bokmal 1049 { "nd-", "-Latn" }, // North Ndebele 1050 { "ne-", "-Deva" }, // Nepali 1051 { "nl-", "-Latn" }, // Dutch 1052 { "nmg-", "-Latn" }, // Kwasio 1053 { "nn-", "-Latn" }, // Norwegian Nynorsk 1054 { "nus-", "-Latn" }, // Nuer 1055 { "ny-", "-Latn" }, // Chichewa/Nyanja 1056 { "nyn-", "-Latn" }, // Nyankole 1057 { "om-", "-Latn" }, // Oromo 1058 { "or-", "-Orya" }, // Oriya 1059 { "pa-", "-Guru" }, // Punjabi 1060 { "pl-", "-Latn" }, // Polish 1061 { "ps-", "-Arab" }, // Pushto 1062 { "pt-", "-Latn" }, // Portuguese 1063 { "qu-", "-Latn" }, // Quechua 1064 { "rm-", "-Latn" }, // Romansh 1065 { "rn-", "-Latn" }, // Rundi 1066 { "ro-", "-Latn" }, // Romanian 1067 { "rof-", "-Latn" }, // Rombo 1068 { "ru-", "-Cyrl" }, // Russian 1069 { "rw-", "-Latn" }, // Kinyarwanda 1070 { "rwk-", "-Latn" }, // Rwa 1071 { "sa-", "-Deva" }, // Sanskrit (? not Suppress-Script) 1072 { "saq-", "-Latn" }, // Samburu 1073 { "sbp-", "-Latn" }, // Sangu 1074 { "se-", "-Latn" }, // Sami (? not Suppress-Script) 1075 { "seh-", "-Latn" }, // Sena 1076 { "ses-", "-Latn" }, // Koyraboro Senni 1077 { "sg-", "-Latn" }, // Sango 1078 { "shi-", "-Latn" }, // Tachelhit 1079 { "si-", "-Sinh" }, // Sinhala 1080 { "sk-", "-Latn" }, // Slovak 1081 { "sl-", "-Latn" }, // Slovenian 1082 { "sn-", "-Latn" }, // Shona 1083 { "so-", "-Latn" }, // Somali 1084 { "sq-", "-Latn" }, // Albanian 1085 { "sr-", "-Cyrl" }, // Serbian 1086 { "sv-", "-Latn" }, // Swedish 1087 { "sw-", "-Latn" }, // Swahili 1088 { "swc-", "-Latn" }, // Congo Swahili 1089 { "ta-", "-Taml" }, // Tamil 1090 { "te-", "-Telu" }, // Telugu 1091 { "teo-", "-Latn" }, // Teso 1092 { "tg-", "-Cyrl" }, // Tajik 1093 { "th-", "-Thai" }, // Thai 1094 { "ti-", "-Ethi" }, // Tigrinya 1095 { "tk-", "-Latn" }, // Turkmen 1096 { "tn-", "-Latn" }, // Tswana 1097 { "to-", "-Latn" }, // Tonga of Tonga Islands 1098 { "tr-", "-Latn" }, // Turkish 1099 { "twq-", "-Latn" }, // Tasawaq 1100 { "tzm-", "-Latn" }, // Central Morocco Tamazight 1101 { "uk-", "-Cyrl" }, // Ukrainian 1102 { "ur-", "-Arab" }, // Urdu 1103 { "uz-", "-Cyrl" }, // Uzbek 1104 { "vai-", "-Vaii" }, // Vai 1105 { "vi-", "-Latn" }, // Vietnamese 1106 { "vun-", "-Latn" }, // Vunjo 1107 { "wo-", "-Latn" }, // Wolof 1108 { "xh-", "-Latn" }, // Xhosa 1109 { "xog-", "-Latn" }, // Soga 1110 { "yav-", "-Latn" }, // Yangben 1111 { "yi-", "-Hebr" }, // Yiddish 1112 { "yo-", "-Latn" }, // Yoruba 1113 { "zh-", "-Hani" }, // Chinese (? not Suppress-Script) 1114 { "zu-", "-Latn" }, // Zulu 1115 }; 1116 enum { 1117 kNumLocaleStringPrefixToDefaults = sizeof(localeStringPrefixToDefaults)/sizeof(KeyStringToResultString) 1118 }; 1119 1120 static const KeyStringToResultString appleLocaleToLanguageString[] = { 1121 // Map locale strings that Apple uses as language IDs to real language strings. 1122 // Must be sorted according to how strcmp compares the strings in the first column. 1123 // Note: Now we remove all transforms of the form ll_RR -> ll-RR, they are now 1124 // handled in the code. <1.19> 1125 // 1126 // locale lang [ comment ] 1127 // string string 1128 // ------- ------- 1129 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] 1130 { "zh_CN", "zh-Hans" }, // mainland China => simplified 1131 { "zh_HK", "zh-HK" }, // Hong Kong => traditional, not currently used 1132 { "zh_MO", "zh-MO" }, // Macao => traditional, not currently used 1133 { "zh_SG", "zh-SG" }, // Singapore => simplified, not currently used 1134 { "zh_TW", "zh-Hant" }, // Taiwan => traditional 1135 }; 1136 enum { 1137 kNumAppleLocaleToLanguageString = sizeof(appleLocaleToLanguageString)/sizeof(KeyStringToResultString) 1138 }; 1139 1140 /* 1141 static const KeyStringToResultString appleLocaleToLanguageStringForCFBundle[] = { 1142 // Map locale strings that Apple uses as language IDs to real language strings. 1143 // Must be sorted according to how strcmp compares the strings in the first column. 1144 // 1145 // locale lang [ comment ] 1146 // string string 1147 // ------- ------- 1148 { "de_AT", "de-AT" }, // Austrian German 1149 { "de_CH", "de-CH" }, // Swiss German 1150 // { "de_DE", "de-DE" }, // German for Germany (default), not currently used 1151 { "en_AU", "en-AU" }, // Australian English 1152 { "en_CA", "en-CA" }, // Canadian English 1153 { "en_GB", "en-GB" }, // British English 1154 // { "en_IE", "en-IE" }, // Irish English, not currently used 1155 { "en_US", "en-US" }, // U.S. English 1156 { "en_US_POSIX", "en-US-POSIX" }, // POSIX locale, need as language string // <1.17> [3840752] 1157 // { "fr_BE", "fr-BE" }, // Belgian French, not currently used 1158 { "fr_CA", "fr-CA" }, // Canadian French 1159 { "fr_CH", "fr-CH" }, // Swiss French 1160 // { "fr_FR", "fr-FR" }, // French for France (default), not currently used 1161 { "nl_BE", "nl-BE" }, // Flemish = Vlaams, Dutch for Belgium 1162 // { "nl_NL", "nl-NL" }, // Dutch for Netherlands (default), not currently used 1163 { "pt_BR", "pt-BR" }, // Brazilian Portuguese 1164 { "pt_PT", "pt-PT" }, // Portuguese for Portugal 1165 { "zh_CN", "zh-Hans" }, // mainland China => simplified 1166 { "zh_HK", "zh-Hant" }, // Hong Kong => traditional, not currently used 1167 { "zh_MO", "zh-Hant" }, // Macao => traditional, not currently used 1168 { "zh_SG", "zh-Hans" }, // Singapore => simplified, not currently used 1169 { "zh_TW", "zh-Hant" }, // Taiwan => traditional 1170 }; 1171 enum { 1172 kNumAppleLocaleToLanguageStringForCFBundle = sizeof(appleLocaleToLanguageStringForCFBundle)/sizeof(KeyStringToResultString) 1173 }; 1174 */ 1175 1176 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 1177 1178 struct LocaleToLegacyCodes { 1179 const char * locale; // reduced to language plus one other component (script, region, variant), separators normalized to'_' 1180 RegionCode regCode; 1181 LangCode langCode; 1182 CFStringEncoding encoding; 1183 }; 1184 typedef struct LocaleToLegacyCodes LocaleToLegacyCodes; 1185 1186 static const LocaleToLegacyCodes localeToLegacyCodes[] = { 1187 // locale RegionCode LangCode CFStringEncoding 1188 { "af"/*ZA*/, 102/*verAfrikaans*/, 141/*langAfrikaans*/, 0/*Roman*/ }, // Latn 1189 { "am", -1, 85/*langAmharic*/, 28/*Ethiopic*/ }, // Ethi 1190 { "ar", 16/*verArabic*/, 12/*langArabic*/, 4/*Arabic*/ }, // Arab; 1191 { "as", -1, 68/*langAssamese*/, 13/*Bengali*/ }, // Beng; 1192 { "ay", -1, 134/*langAymara*/, 0/*Roman*/ }, // Latn; 1193 { "az", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // "az" defaults to -Latn 1194 { "az_Arab", -1, 50/*langAzerbaijanAr*/, 4/*Arabic*/ }, // Arab; 1195 { "az_Cyrl", -1, 49/*langAzerbaijani*/, 7/*Cyrillic*/ }, // Cyrl; 1196 { "az_Latn", -1, 150/*langAzerbaijanRoman*/, 0/*Roman*/ }, // Latn; 1197 { "be"/*BY*/, 61/*verBelarus*/, 46/*langBelorussian*/, 7/*Cyrillic*/ }, // Cyrl; 1198 { "bg"/*BG*/, 72/*verBulgaria*/, 44/*langBulgarian*/, 7/*Cyrillic*/ }, // Cyrl; 1199 { "bn", 60/*verBengali*/, 67/*langBengali*/, 13/*Bengali*/ }, // Beng; 1200 { "bo", 105/*verTibetan*/, 63/*langTibetan*/, 26/*Tibetan*/ }, // Tibt; 1201 { "br", 77/*verBreton*/, 142/*langBreton*/, 39/*Celtic*/ }, // Latn; 1202 { "ca"/*ES*/, 73/*verCatalonia*/, 130/*langCatalan*/, 0/*Roman*/ }, // Latn; 1203 { "cs"/*CZ*/, 56/*verCzech*/, 38/*langCzech*/, 29/*CentralEurRoman*/ }, // Latn; 1204 { "cy", 79/*verWelsh*/, 128/*langWelsh*/, 39/*Celtic*/ }, // Latn; 1205 { "da"/*DK*/, 9/*verDenmark*/, 7/*langDanish*/, 0/*Roman*/ }, // Latn; 1206 { "de", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, // assume "de" defaults to verGermany 1207 { "de_1996", 70/*verGermanReformed*/, 2/*langGerman*/, 0/*Roman*/ }, 1208 { "de_AT", 92/*verAustria*/, 2/*langGerman*/, 0/*Roman*/ }, 1209 { "de_CH", 19/*verGrSwiss*/, 2/*langGerman*/, 0/*Roman*/ }, 1210 { "de_DE", 3/*verGermany*/, 2/*langGerman*/, 0/*Roman*/ }, 1211 { "dz"/*BT*/, 83/*verBhutan*/, 137/*langDzongkha*/, 26/*Tibetan*/ }, // Tibt; 1212 { "el", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // assume "el" defaults to verGreece 1213 { "el_CY", 23/*verCyprus*/, 14/*langGreek*/, 6/*Greek*/ }, 1214 { "el_GR", 20/*verGreece*/, 14/*langGreek*/, 6/*Greek*/ }, // modern monotonic 1215 { "en", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, // "en" defaults to verUS (per Chris Hansten) 1216 { "en_001", 37/*verInternational*/, 0/*langEnglish*/, 0/*Roman*/ }, 1217 { "en_AU", 15/*verAustralia*/, 0/*langEnglish*/, 0/*Roman*/ }, 1218 { "en_CA", 82/*verEngCanada*/, 0/*langEnglish*/, 0/*Roman*/ }, 1219 { "en_GB", 2/*verBritain*/, 0/*langEnglish*/, 0/*Roman*/ }, 1220 { "en_IE", 108/*verIrelandEnglish*/, 0/*langEnglish*/, 0/*Roman*/ }, 1221 { "en_SG", 100/*verSingapore*/, 0/*langEnglish*/, 0/*Roman*/ }, 1222 { "en_US", 0/*verUS*/, 0/*langEnglish*/, 0/*Roman*/ }, 1223 { "eo", 103/*verEsperanto*/, 94/*langEsperanto*/, 0/*Roman*/ }, // Latn; 1224 { "es", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, // "es" defaults to verSpain (per Chris Hansten) 1225 { "es_419", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, // new BCP 47 tag 1226 { "es_ES", 8/*verSpain*/, 6/*langSpanish*/, 0/*Roman*/ }, 1227 { "es_MX", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, 1228 { "es_US", 86/*verSpLatinAmerica*/, 6/*langSpanish*/, 0/*Roman*/ }, 1229 { "et"/*EE*/, 44/*verEstonia*/, 27/*langEstonian*/, 29/*CentralEurRoman*/ }, 1230 { "eu", -1, 129/*langBasque*/, 0/*Roman*/ }, // Latn; 1231 { "fa"/*IR*/, 48/*verIran*/, 31/*langFarsi/Persian*/, 0x8C/*Farsi*/ }, // Arab; 1232 { "fi"/*FI*/, 17/*verFinland*/, 13/*langFinnish*/, 0/*Roman*/ }, 1233 { "fil", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; 1234 { "fo"/*FO*/, 47/*verFaroeIsl*/, 30/*langFaroese*/, 37/*Icelandic*/ }, 1235 { "fr", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, // "fr" defaults to verFrance (per Chris Hansten) 1236 { "fr_001", 91/*verFrenchUniversal*/, 1/*langFrench*/, 0/*Roman*/ }, 1237 { "fr_BE", 98/*verFrBelgium*/, 1/*langFrench*/, 0/*Roman*/ }, 1238 { "fr_CA", 11/*verFrCanada*/, 1/*langFrench*/, 0/*Roman*/ }, 1239 { "fr_CH", 18/*verFrSwiss*/, 1/*langFrench*/, 0/*Roman*/ }, 1240 { "fr_FR", 1/*verFrance*/, 1/*langFrench*/, 0/*Roman*/ }, 1241 { "ga"/*IE*/, 50/*verIreland*/, 35/*langIrishGaelic*/, 0/*Roman*/ }, // no dots (h after) 1242 { "ga_Latg"/*IE*/, 81/*verIrishGaelicScrip*/, 146/*langIrishGaelicScript*/, 40/*Gaelic*/ }, // using dots 1243 { "gd", 75/*verScottishGaelic*/, 144/*langScottishGaelic*/, 39/*Celtic*/ }, 1244 { "gl", -1, 140/*langGalician*/, 0/*Roman*/ }, // Latn; 1245 { "gn", -1, 133/*langGuarani*/, 0/*Roman*/ }, // Latn; 1246 { "grc", 40/*verGreekAncient*/, 148/*langGreekAncient*/, 6/*Greek*/ }, // polytonic (MacGreek doesn't actually support it) 1247 { "gu"/*IN*/, 94/*verGujarati*/, 69/*langGujarati*/, 11/*Gujarati*/ }, // Gujr; 1248 { "gv", 76/*verManxGaelic*/, 145/*langManxGaelic*/, 39/*Celtic*/ }, // Latn; 1249 { "he"/*IL*/, 13/*verIsrael*/, 10/*langHebrew*/, 5/*Hebrew*/ }, // Hebr; 1250 { "hi"/*IN*/, 33/*verIndiaHindi*/, 21/*langHindi*/, 9/*Devanagari*/ }, // Deva; 1251 { "hr"/*HR*/, 68/*verCroatia*/, 18/*langCroatian*/, 36/*Croatian*/ }, 1252 { "hu"/*HU*/, 43/*verHungary*/, 26/*langHungarian*/, 29/*CentralEurRoman*/ }, 1253 { "hy"/*AM*/, 84/*verArmenian*/, 51/*langArmenian*/, 24/*Armenian*/ }, // Armn; 1254 { "id", -1, 81/*langIndonesian*/, 0/*Roman*/ }, // Latn; 1255 { "is"/*IS*/, 21/*verIceland*/, 15/*langIcelandic*/, 37/*Icelandic*/ }, 1256 { "it", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, // "it" defaults to verItaly 1257 { "it_CH", 36/*verItalianSwiss*/, 3/*langItalian*/, 0/*Roman*/ }, 1258 { "it_IT", 4/*verItaly*/, 3/*langItalian*/, 0/*Roman*/ }, 1259 { "iu"/*CA*/, 78/*verNunavut*/, 143/*langInuktitut*/, 0xEC/*Inuit*/ }, // Cans; 1260 { "ja"/*JP*/, 14/*verJapan*/, 11/*langJapanese*/, 1/*Japanese*/ }, // Jpan; 1261 { "jv", -1, 138/*langJavaneseRom*/, 0/*Roman*/ }, // Latn; 1262 { "ka"/*GE*/, 85/*verGeorgian*/, 52/*langGeorgian*/, 23/*Georgian*/ }, // Geor; 1263 { "kk", -1, 48/*langKazakh*/, 7/*Cyrillic*/ }, // "kk" defaults to -Cyrl; also have -Latn, -Arab 1264 { "kl", 107/*verGreenland*/, 149/*langGreenlandic*/, 0/*Roman*/ }, // Latn; 1265 { "km", -1, 78/*langKhmer*/, 20/*Khmer*/ }, // Khmr; 1266 { "kn", -1, 73/*langKannada*/, 16/*Kannada*/ }, // Knda; 1267 { "ko"/*KR*/, 51/*verKorea*/, 23/*langKorean*/, 3/*Korean*/ }, // Hang; 1268 { "ks", -1, 61/*langKashmiri*/, 4/*Arabic*/ }, // Arab; 1269 { "ku", -1, 60/*langKurdish*/, 4/*Arabic*/ }, // Arab; 1270 { "ky", -1, 54/*langKirghiz*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab 1271 { "la", -1, 131/*langLatin*/, 0/*Roman*/ }, // Latn; 1272 { "lo", -1, 79/*langLao*/, 22/*Laotian*/ }, // Laoo; 1273 { "lt"/*LT*/, 41/*verLithuania*/, 24/*langLithuanian*/, 29/*CentralEurRoman*/ }, 1274 { "lv"/*LV*/, 45/*verLatvia*/, 28/*langLatvian*/, 29/*CentralEurRoman*/ }, 1275 { "mg", -1, 93/*langMalagasy*/, 0/*Roman*/ }, // Latn; 1276 { "mk"/*MK*/, 67/*verMacedonian*/, 43/*langMacedonian*/, 7/*Cyrillic*/ }, // Cyrl; 1277 { "ml", -1, 72/*langMalayalam*/, 17/*Malayalam*/ }, // Mlym; 1278 { "mn", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // "mn" defaults to -Cyrl 1279 { "mn_Cyrl", -1, 58/*langMongolianCyr*/, 7/*Cyrillic*/ }, // Cyrl; 1280 { "mn_Mong", -1, 57/*langMongolian*/, 27/*Mongolian*/ }, // Mong; 1281 { "mo", -1, 53/*langMoldavian*/, 7/*Cyrillic*/ }, // Cyrl; 1282 { "mr"/*IN*/, 104/*verMarathi*/, 66/*langMarathi*/, 9/*Devanagari*/ }, // Deva; 1283 { "ms", -1, 83/*langMalayRoman*/, 0/*Roman*/ }, // "ms" defaults to -Latn; 1284 { "ms_Arab", -1, 84/*langMalayArabic*/, 4/*Arabic*/ }, // Arab; 1285 { "mt"/*MT*/, 22/*verMalta*/, 16/*langMaltese*/, 0/*Roman*/ }, // Latn; 1286 { "mul", 74/*verMultilingual*/, -1, 0 }, 1287 { "my", -1, 77/*langBurmese*/, 19/*Burmese*/ }, // Mymr; 1288 { "nb"/*NO*/, 12/*verNorway*/, 9/*langNorwegian*/, 0/*Roman*/ }, 1289 { "ne"/*NP*/, 106/*verNepal*/, 64/*langNepali*/, 9/*Devanagari*/ }, // Deva; 1290 { "nl", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, // "nl" defaults to verNetherlands 1291 { "nl_BE", 6/*verFlemish*/, 34/*langFlemish*/, 0/*Roman*/ }, 1292 { "nl_NL", 5/*verNetherlands*/, 4/*langDutch*/, 0/*Roman*/ }, 1293 { "nn"/*NO*/, 101/*verNynorsk*/, 151/*langNynorsk*/, 0/*Roman*/ }, 1294 { "ny", -1, 92/*langNyanja/Chewa*/, 0/*Roman*/ }, // Latn; 1295 { "om", -1, 87/*langOromo*/, 28/*Ethiopic*/ }, // Ethi; 1296 { "or", -1, 71/*langOriya*/, 12/*Oriya*/ }, // Orya; 1297 { "pa", 95/*verPunjabi*/, 70/*langPunjabi*/, 10/*Gurmukhi*/ }, // Guru; 1298 { "pl"/*PL*/, 42/*verPoland*/, 25/*langPolish*/, 29/*CentralEurRoman*/ }, 1299 { "ps", -1, 59/*langPashto*/, 0x8C/*Farsi*/ }, // Arab; 1300 { "pt", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, // "pt" defaults to verBrazil (per Chris Hansten) 1301 { "pt_BR", 71/*verBrazil*/, 8/*langPortuguese*/, 0/*Roman*/ }, 1302 { "pt_PT", 10/*verPortugal*/, 8/*langPortuguese*/, 0/*Roman*/ }, 1303 { "qu", -1, 132/*langQuechua*/, 0/*Roman*/ }, // Latn; 1304 { "rn", -1, 91/*langRundi*/, 0/*Roman*/ }, // Latn; 1305 { "ro"/*RO*/, 39/*verRomania*/, 37/*langRomanian*/, 38/*Romanian*/ }, 1306 { "ru"/*RU*/, 49/*verRussia*/, 32/*langRussian*/, 7/*Cyrillic*/ }, // Cyrl; 1307 { "rw", -1, 90/*langKinyarwanda*/, 0/*Roman*/ }, // Latn; 1308 { "sa", -1, 65/*langSanskrit*/, 9/*Devanagari*/ }, // Deva; 1309 { "sd", -1, 62/*langSindhi*/, 0x8C/*Farsi*/ }, // Arab; 1310 { "se", 46/*verSami*/, 29/*langSami*/, 0/*Roman*/ }, 1311 { "si", -1, 76/*langSinhalese*/, 18/*Sinhalese*/ }, // Sinh; 1312 { "sk"/*SK*/, 57/*verSlovak*/, 39/*langSlovak*/, 29/*CentralEurRoman*/ }, 1313 { "sl"/*SI*/, 66/*verSlovenian*/, 40/*langSlovenian*/, 36/*Croatian*/ }, 1314 { "so", -1, 88/*langSomali*/, 0/*Roman*/ }, // Latn; 1315 { "sq", -1, 36/*langAlbanian*/, 0/*Roman*/ }, 1316 { "sr"/*CS,RS*/, 65/*verSerbian*/, 42/*langSerbian*/, 7/*Cyrillic*/ }, // Cyrl; 1317 { "su", -1, 139/*langSundaneseRom*/, 0/*Roman*/ }, // Latn; 1318 { "sv"/*SE*/, 7/*verSweden*/, 5/*langSwedish*/, 0/*Roman*/ }, 1319 { "sw", -1, 89/*langSwahili*/, 0/*Roman*/ }, // Latn; 1320 { "ta", -1, 74/*langTamil*/, 14/*Tamil*/ }, // Taml; 1321 { "te", -1, 75/*langTelugu*/, 15/*Telugu*/ }, // Telu 1322 { "tg", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // "tg" defaults to "Cyrl" 1323 { "tg_Cyrl", -1, 55/*langTajiki*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab 1324 { "th"/*TH*/, 54/*verThailand*/, 22/*langThai*/, 21/*Thai*/ }, // Thai; 1325 { "ti", -1, 86/*langTigrinya*/, 28/*Ethiopic*/ }, // Ethi; 1326 { "tk", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // "tk" defaults to Cyrl 1327 { "tk_Cyrl", -1, 56/*langTurkmen*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab 1328 { "tl", -1, 82/*langTagalog*/, 0/*Roman*/ }, // Latn; 1329 { "to"/*TO*/, 88/*verTonga*/, 147/*langTongan*/, 0/*Roman*/ }, // Latn; 1330 { "tr"/*TR*/, 24/*verTurkey*/, 17/*langTurkish*/, 35/*Turkish*/ }, // Latn; 1331 { "tt", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; 1332 { "tt_Cyrl", -1, 135/*langTatar*/, 7/*Cyrillic*/ }, // Cyrl; 1333 { "ug", -1, 136/*langUighur*/, 4/*Arabic*/ }, // Arab; 1334 { "uk"/*UA*/, 62/*verUkraine*/, 45/*langUkrainian*/, 7/*Cyrillic*/ }, // Cyrl; 1335 { "und", 55/*verScriptGeneric*/, -1, 0 }, 1336 { "ur", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // "ur" defaults to verPakistanUrdu 1337 { "ur_IN", 96/*verIndiaUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab 1338 { "ur_PK", 34/*verPakistanUrdu*/, 20/*langUrdu*/, 0x8C/*Farsi*/ }, // Arab 1339 { "uz"/*UZ*/, 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, // Cyrl; also -Latn, -Arab 1340 { "uz_Cyrl", 99/*verUzbek*/, 47/*langUzbek*/, 7/*Cyrillic*/ }, 1341 { "vi"/*VN*/, 97/*verVietnam*/, 80/*langVietnamese*/, 30/*Vietnamese*/ }, // Latn 1342 { "yi", -1, 41/*langYiddish*/, 5/*Hebrew*/ }, // Hebr; 1343 { "zh", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, // "zh" defaults to verChina, langSimpChinese 1344 { "zh_CN", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, 1345 { "zh_HK", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, 1346 { "zh_Hans", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, 1347 { "zh_Hant", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, 1348 { "zh_MO", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, 1349 { "zh_SG", 52/*verChina*/, 33/*langSimpChinese*/, 25/*ChineseSimp*/ }, 1350 { "zh_TW", 53/*verTaiwan*/, 19/*langTradChinese*/, 2/*ChineseTrad*/ }, 1351 }; 1352 enum { 1353 kNumLocaleToLegacyCodes = sizeof(localeToLegacyCodes)/sizeof(localeToLegacyCodes[0]) 1354 }; 1355 1356 #endif 1357 1358 /* 1359 For reference here is a list of ICU locales with variants and how some 1360 of them are canonicalized with the ICU function uloc_canonicalize: 1361 1362 ICU 3.0 has: 1363 en_US_POSIX x no change 1364 hy_AM_REVISED x no change 1365 ja_JP_TRADITIONAL -> ja_JP@calendar=japanese 1366 th_TH_TRADITIONAL -> th_TH@calendar=buddhist 1367 1368 ICU 2.8 also had the following (now obsolete): 1369 ca_ES_PREEURO 1370 de__PHONEBOOK -> de@collation=phonebook 1371 de_AT_PREEURO 1372 de_DE_PREEURO 1373 de_LU_PREEURO 1374 el_GR_PREEURO 1375 en_BE_PREEURO 1376 en_GB_EURO -> en_GB@currency=EUR 1377 en_IE_PREEURO -> en_IE@currency=IEP 1378 es__TRADITIONAL -> es@collation=traditional 1379 es_ES_PREEURO 1380 eu_ES_PREEURO 1381 fi_FI_PREEURO 1382 fr_BE_PREEURO 1383 fr_FR_PREEURO -> fr_FR@currency=FRF 1384 fr_LU_PREEURO 1385 ga_IE_PREEURO 1386 gl_ES_PREEURO 1387 hi__DIRECT -> hi@collation=direct 1388 it_IT_PREEURO 1389 nl_BE_PREEURO 1390 nl_NL_PREEURO 1391 pt_PT_PREEURO 1392 zh__PINYIN -> zh@collation=pinyin 1393 zh_TW_STROKE -> zh_TW@collation=stroke 1394 1395 */ 1396 1397 // _CompareTestEntryToTableEntryKey 1398 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) 1399 // comparison function for bsearch 1400 static int _CompareTestEntryToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { 1401 return strcmp( ((const KeyStringToResultString *)testEntryPtr)->key, ((const KeyStringToResultString *)tableEntryKeyPtr)->key ); 1402 } 1403 1404 // _CompareTestEntryPrefixToTableEntryKey 1405 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) 1406 // Comparison function for bsearch. Assumes prefix IS terminated with '-' or '_'. 1407 // Do the following instead of strlen & strncmp so we don't walk tableEntry key twice. 1408 static int _CompareTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { 1409 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; 1410 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; 1411 1412 while ( *testPtr == *tablePtr && *tablePtr != 0 ) { 1413 testPtr++; tablePtr++; 1414 } 1415 if ( *tablePtr != 0 ) { 1416 // strings are different, and the string in the table has not run out; 1417 // i.e. the table entry is not a prefix of the text string. 1418 return ( *testPtr < *tablePtr )? -1: 1; 1419 } 1420 return 0; 1421 } 1422 1423 // _CompareLowerTestEntryPrefixToTableEntryKey 1424 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) 1425 // Comparison function for bsearch. Assumes prefix NOT terminated with '-' or '_'. 1426 // Lowercases the test string before comparison (the table should already have lowercased entries). 1427 static int _CompareLowerTestEntryPrefixToTableEntryKey(const void *testEntryPtr, const void *tableEntryKeyPtr) { 1428 const char * testPtr = ((const KeyStringToResultString *)testEntryPtr)->key; 1429 const char * tablePtr = ((const KeyStringToResultString *)tableEntryKeyPtr)->key; 1430 char lowerTestChar; 1431 1432 while ( (lowerTestChar = tolower(*testPtr)) == *tablePtr && *tablePtr != 0 && lowerTestChar != '_' ) { // <1.9> 1433 testPtr++; tablePtr++; 1434 } 1435 if ( *tablePtr != 0 ) { 1436 // strings are different, and the string in the table has not run out; 1437 // i.e. the table entry is not a prefix of the text string. 1438 if (lowerTestChar == '_') // <1.9> 1439 return -1; // <1.9> 1440 return ( lowerTestChar < *tablePtr )? -1: 1; 1441 } 1442 // The string in the table has run out. If the test string char is not alnum, 1443 // then the string matches, else the test string sorts after. 1444 return ( !isalnum(lowerTestChar) )? 0: 1; 1445 } 1446 1447 // _DeleteCharsAtPointer 1448 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) 1449 // remove _length_ characters from the beginning of the string indicated by _stringPtr_ 1450 // (we know that the string has at least _length_ characters in it) 1451 static void _DeleteCharsAtPointer(char *stringPtr, int length) { 1452 do { 1453 *stringPtr = stringPtr[length]; 1454 } while (*stringPtr++ != 0); 1455 } 1456 1457 // _CopyReplacementAtPointer 1458 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) 1459 // Copy replacement string (*excluding* terminating NULL byte) to the place indicated by stringPtr 1460 static void _CopyReplacementAtPointer(char *stringPtr, const char *replacementPtr) { 1461 while (*replacementPtr != 0) { 1462 *stringPtr++ = *replacementPtr++; 1463 } 1464 } 1465 1466 // _CheckForTag 1467 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) 1468 static Boolean _CheckForTag(const char *localeStringPtr, const char *tagPtr, int tagLen) { 1469 return ( strncmp(localeStringPtr, tagPtr, tagLen) == 0 && !isalnum(localeStringPtr[tagLen]) ); 1470 } 1471 1472 // _ReplacePrefix 1473 // Move this code from _UpdateFullLocaleString into separate function // <1.10> 1474 static void _ReplacePrefix(char locString[], int locStringMaxLen, int oldPrefixLen, const char *newPrefix) { 1475 int newPrefixLen = strlen(newPrefix); 1476 int lengthDelta = newPrefixLen - oldPrefixLen; 1477 1478 if (lengthDelta < 0) { 1479 // replacement is shorter, delete chars by shifting tail of string 1480 _DeleteCharsAtPointer(locString + newPrefixLen, -lengthDelta); 1481 } else if (lengthDelta > 0) { 1482 // replacement is longer... 1483 int stringLen = strlen(locString); 1484 1485 if (stringLen + lengthDelta < locStringMaxLen) { 1486 // make room by shifting tail of string 1487 char * tailShiftPtr = locString + stringLen; 1488 char * tailStartPtr = locString + oldPrefixLen; // pointer to tail of string to shift 1489 1490 while (tailShiftPtr >= tailStartPtr) { 1491 tailShiftPtr[lengthDelta] = *tailShiftPtr; 1492 tailShiftPtr--; 1493 } 1494 } else { 1495 // no room, can't do substitution 1496 newPrefix = NULL; 1497 } 1498 } 1499 1500 if (newPrefix) { 1501 // do the substitution 1502 _CopyReplacementAtPointer(locString, newPrefix); 1503 } 1504 } 1505 1506 // _UpdateFullLocaleString 1507 // Given a locale string that uses standard codes (not a special old-style Apple string), 1508 // update all the language codes and region codes to latest versions, map 3-letter 1509 // language codes to 2-letter codes if possible, and normalize casing. If requested, return 1510 // pointers to a language-region variant subtag (if present) and a region tag (if present). 1511 // (add locStringMaxLen parameter) // <1.10> 1512 static void _UpdateFullLocaleString(char inLocaleString[], int locStringMaxLen, 1513 char **langRegSubtagRef, char **regionTagRef, 1514 char varKeyValueString[]) // <1.17> 1515 { 1516 KeyStringToResultString testEntry; 1517 KeyStringToResultString * foundEntry; 1518 const SpecialCaseUpdates * specialCasePtr; 1519 char * inLocalePtr; 1520 char * subtagPtr; 1521 char * langRegSubtag = NULL; 1522 char * regionTag = NULL; 1523 char * variantTag = NULL; 1524 Boolean subtagHasDigits, pastPrimarySubtag, hadRegion; 1525 1526 // 1. First replace any non-canonical prefix (case insensitive) with canonical 1527 // (change 3-letter ISO 639 code to 2-letter, update obsolete ISO 639 codes & RFC 3066 tags, etc.) 1528 1529 testEntry.key = inLocaleString; 1530 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToCanonical, kNumLocaleStringPrefixToCanonical, 1531 sizeof(KeyStringToResultString), _CompareLowerTestEntryPrefixToTableEntryKey ); 1532 if (foundEntry) { 1533 // replace key (at beginning of string) with result 1534 _ReplacePrefix(inLocaleString, locStringMaxLen, strlen(foundEntry->key), foundEntry->result); // <1.10> 1535 } 1536 1537 // 2. Walk through input string, normalizing case & marking use of ISO 3166 codes 1538 1539 inLocalePtr = inLocaleString; 1540 subtagPtr = inLocaleString; 1541 subtagHasDigits = false; 1542 pastPrimarySubtag = false; 1543 hadRegion = false; 1544 1545 while ( true ) { 1546 if ( isalpha(*inLocalePtr) ) { 1547 // if not past a region tag, then lowercase, else uppercase 1548 *inLocalePtr = (!hadRegion)? tolower(*inLocalePtr): toupper(*inLocalePtr); 1549 } else if ( isdigit(*inLocalePtr) ) { 1550 subtagHasDigits = true; 1551 } else { 1552 1553 if (!pastPrimarySubtag) { 1554 // may have a NULL primary subtag 1555 if (subtagHasDigits) { 1556 break; 1557 } 1558 pastPrimarySubtag = true; 1559 } else if (!hadRegion) { 1560 // We are after any primary language subtag, but not past any region tag. 1561 // This subtag is preceded by '-' or '_'. 1562 int subtagLength = inLocalePtr - subtagPtr; // includes leading '-' or '_' 1563 1564 if (subtagLength == 3 && !subtagHasDigits) { 1565 // potential ISO 3166 code for region or language variant; if so, needs uppercasing 1566 if (*subtagPtr == '_') { 1567 regionTag = subtagPtr; 1568 hadRegion = true; 1569 subtagPtr[1] = toupper(subtagPtr[1]); 1570 subtagPtr[2] = toupper(subtagPtr[2]); 1571 } else if (langRegSubtag == NULL) { 1572 langRegSubtag = subtagPtr; 1573 subtagPtr[1] = toupper(subtagPtr[1]); 1574 subtagPtr[2] = toupper(subtagPtr[2]); 1575 } 1576 } else if (subtagLength == 4 && subtagHasDigits) { 1577 // potential UN M.49 region code 1578 if (*subtagPtr == '_') { 1579 regionTag = subtagPtr; 1580 hadRegion = true; 1581 } else if (langRegSubtag == NULL) { 1582 langRegSubtag = subtagPtr; 1583 } 1584 } else if (subtagLength == 5 && !subtagHasDigits) { 1585 // ISO 15924 script code, uppercase just the first letter 1586 subtagPtr[1] = toupper(subtagPtr[1]); 1587 } else if (subtagLength == 1 && *subtagPtr == '_') { // <1.17> 1588 hadRegion = true; 1589 } 1590 1591 if (!hadRegion) { 1592 // convert improper '_' to '-' 1593 *subtagPtr = '-'; 1594 } 1595 } else { 1596 variantTag = subtagPtr; // <1.17> 1597 } 1598 1599 if (*inLocalePtr == '-' || *inLocalePtr == '_') { 1600 subtagPtr = inLocalePtr; 1601 subtagHasDigits = false; 1602 } else { 1603 break; 1604 } 1605 } 1606 1607 inLocalePtr++; 1608 } 1609 1610 // 3 If there is a variant tag, see if ICU canonicalizes it to keywords. // <1.17> [3577669] 1611 // If so, copy the keywords to varKeyValueString and delete the variant tag 1612 // from the original string (but don't otherwise use the ICU canonicalization). 1613 varKeyValueString[0] = 0; 1614 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 1615 if (variantTag) { 1616 UErrorCode icuStatus; 1617 int icuCanonStringLen; 1618 char * varKeyValueStringPtr = varKeyValueString; 1619 1620 icuStatus = U_ZERO_ERROR; 1621 icuCanonStringLen = uloc_canonicalize( inLocaleString, varKeyValueString, locStringMaxLen, &icuStatus ); 1622 if ( U_SUCCESS(icuStatus) ) { 1623 char * icuCanonStringPtr = varKeyValueString; 1624 1625 if (icuCanonStringLen >= locStringMaxLen) 1626 icuCanonStringLen = locStringMaxLen - 1; 1627 varKeyValueString[icuCanonStringLen] = 0; 1628 while (*icuCanonStringPtr != 0 && *icuCanonStringPtr != ULOC_KEYWORD_SEPARATOR) 1629 ++icuCanonStringPtr; 1630 if (*icuCanonStringPtr != 0) { 1631 // the canonicalized string has keywords 1632 // delete the variant tag in the original string (and other trailing '_' or '-') 1633 *variantTag-- = 0; 1634 while (*variantTag == '_') 1635 *variantTag-- = 0; 1636 // delete all of the canonicalized string except the keywords 1637 while (*icuCanonStringPtr != 0) 1638 *varKeyValueStringPtr++ = *icuCanonStringPtr++; 1639 } 1640 *varKeyValueStringPtr = 0; 1641 } 1642 } 1643 #endif 1644 1645 // 4. Handle special cases of updating region codes, or updating language codes based on 1646 // region code. 1647 for (specialCasePtr = specialCases; specialCasePtr->reg1 != NULL; specialCasePtr++) { 1648 if ( specialCasePtr->lang == NULL || _CheckForTag(inLocaleString, specialCasePtr->lang, 2) ) { 1649 // OK, we matched any language specified. Now what needs updating? 1650 char * foundTag; 1651 1652 if ( isupper(specialCasePtr->update1[0]) ) { 1653 // updating a region code 1654 if ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) { 1655 _CopyReplacementAtPointer(foundTag+1, specialCasePtr->update1); 1656 } 1657 if ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) { 1658 _CopyReplacementAtPointer(regionTag+1, specialCasePtr->update1); 1659 } 1660 1661 } else { 1662 // updating the language, there will be two choices based on region 1663 if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg1 + 1, 2) ) || 1664 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg1) ) && !isalnum(foundTag[3]) ) ) { 1665 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update1); 1666 } else if ( ( regionTag && _CheckForTag(regionTag+1, specialCasePtr->reg2 + 1, 2) ) || 1667 ( ( foundTag = strstr(inLocaleString, specialCasePtr->reg2) ) && !isalnum(foundTag[3]) ) ) { 1668 _CopyReplacementAtPointer(inLocaleString, specialCasePtr->update2); 1669 } 1670 } 1671 } 1672 } 1673 1674 // 5. return pointers if requested. 1675 if (langRegSubtagRef != NULL) { 1676 *langRegSubtagRef = langRegSubtag; 1677 } 1678 if (regionTagRef != NULL) { 1679 *regionTagRef = regionTag; 1680 } 1681 } 1682 1683 1684 // _RemoveSubstringsIfPresent 1685 // (Local function for CFLocaleCreateCanonicalLocaleIdentifierFromString) 1686 // substringList is a list of space-separated substrings to strip if found in localeString 1687 static void _RemoveSubstringsIfPresent(char *localeString, const char *substringList) { 1688 while (*substringList != 0) { 1689 char currentSubstring[kLocaleIdentifierCStringMax]; 1690 int substringLength = 0; 1691 char * foundSubstring; 1692 1693 // copy current substring & get its length 1694 while ( isgraph(*substringList) ) { 1695 currentSubstring[substringLength++] = *substringList++; 1696 } 1697 // move to next substring 1698 while ( isspace(*substringList) ) { 1699 substringList++; 1700 } 1701 1702 // search for current substring in locale string 1703 if (substringLength == 0) 1704 continue; 1705 currentSubstring[substringLength] = 0; 1706 foundSubstring = strstr(localeString, currentSubstring); 1707 1708 // if substring is found, delete it 1709 if (foundSubstring) { 1710 _DeleteCharsAtPointer(foundSubstring, substringLength); 1711 } 1712 } 1713 } 1714 1715 1716 // _GetKeyValueString // <1.10> 1717 // Removes any key-value string from inLocaleString, puts canonized version in keyValueString 1718 1719 static void _GetKeyValueString(char inLocaleString[], char keyValueString[]) { 1720 char * inLocalePtr = inLocaleString; 1721 1722 while (*inLocalePtr != 0 && *inLocalePtr != ULOC_KEYWORD_SEPARATOR) { 1723 inLocalePtr++; 1724 } 1725 if (*inLocalePtr != 0) { // we found a key-value section 1726 char * keyValuePtr = keyValueString; 1727 1728 *keyValuePtr = *inLocalePtr; 1729 *inLocalePtr = 0; 1730 do { 1731 if ( *(++inLocalePtr) != ' ' ) { 1732 *(++keyValuePtr) = *inLocalePtr; // remove "tolower() for *inLocalePtr" // <1.11> 1733 } 1734 } while (*inLocalePtr != 0); 1735 } else { 1736 keyValueString[0] = 0; 1737 } 1738 } 1739 1740 static void _AppendKeyValueString(char inLocaleString[], int locStringMaxLen, char keyValueString[]) { 1741 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 1742 if (keyValueString[0] != 0) { 1743 UErrorCode uerr = U_ZERO_ERROR; 1744 UEnumeration * uenum = uloc_openKeywords(keyValueString, &uerr); 1745 if ( uenum != NULL ) { 1746 const char * keyword; 1747 int32_t length; 1748 char value[ULOC_KEYWORDS_CAPACITY]; // use as max for keyword value 1749 while ( U_SUCCESS(uerr) ) { 1750 keyword = uenum_next(uenum, &length, &uerr); 1751 if ( keyword == NULL ) { 1752 break; 1753 } 1754 length = uloc_getKeywordValue( keyValueString, keyword, value, sizeof(value), &uerr ); 1755 length = uloc_setKeywordValue( keyword, value, inLocaleString, locStringMaxLen, &uerr ); 1756 } 1757 uenum_close(uenum); 1758 } 1759 } 1760 #endif 1761 } 1762 1763 // __private_extern__ CFStringRef _CFLocaleCreateCanonicalLanguageIdentifierForCFBundle(CFAllocatorRef allocator, CFStringRef localeIdentifier) {} 1764 1765 CFStringRef CFLocaleCreateCanonicalLanguageIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { 1766 char inLocaleString[kLocaleIdentifierCStringMax]; 1767 CFStringRef outStringRef = NULL; 1768 1769 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { 1770 KeyStringToResultString testEntry; 1771 KeyStringToResultString * foundEntry; 1772 char keyValueString[sizeof(inLocaleString)]; // <1.10> 1773 char varKeyValueString[sizeof(inLocaleString)]; // <1.17> 1774 1775 _GetKeyValueString(inLocaleString, keyValueString); // <1.10> 1776 testEntry.result = NULL; 1777 1778 // A. Special case aa_SAAHO, no_BOKMAL, and no_NYNORSK since they are legacy identifiers that don't follow the normal rules (http://unicode.org/cldr/trac/browser/trunk/common/supplemental/supplementalMetadata.xml) 1779 1780 testEntry.key = inLocaleString; 1781 KeyStringToResultString specialCase = testEntry; 1782 foundEntry = &specialCase; 1783 1784 if (strncmp("aa_SAAHO", testEntry.key, strlen("aa_SAAHO")) == 0) { 1785 foundEntry->result = "ssy"; 1786 } else if (strncmp("no_BOKMAL", testEntry.key, strlen("no_BOKMAL")) == 0) { 1787 foundEntry->result = "nb"; 1788 } else if (strncmp("no_NYNORSK", testEntry.key, strlen("no_NYNORSK")) == 0) { 1789 foundEntry->result = "nn"; 1790 } else { 1791 // B. First check if input string matches an old-style string that has a replacement 1792 // (do this before case normalization) 1793 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, 1794 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); 1795 } 1796 if (foundEntry) { 1797 // It does match, so replace old string with new 1798 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); 1799 varKeyValueString[0] = 0; 1800 } else { 1801 char * langRegSubtag = NULL; 1802 char * regionTag = NULL; 1803 1804 // C. No match with an old-style string, use input string but update codes, normalize case, etc. 1805 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17><1.19> 1806 1807 // if the language part already includes a regional variant, then delete any region tag. <1.19> 1808 if (langRegSubtag && regionTag) 1809 *regionTag = 0; 1810 } 1811 1812 // D. Now we have an up-to-date locale string, but we need to strip defaults and turn it into a language string 1813 1814 // 1. Strip defaults in input string based on initial part of locale string 1815 // (mainly to strip default script tag for a language) 1816 testEntry.key = inLocaleString; 1817 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, 1818 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); 1819 if (foundEntry) { 1820 // The input string begins with a character sequence for which 1821 // there are default substrings which should be stripped if present 1822 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); 1823 } 1824 1825 // 2. If the string matches a locale string used by Apple as a language string, turn it into a language string 1826 testEntry.key = inLocaleString; 1827 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, appleLocaleToLanguageString, kNumAppleLocaleToLanguageString, 1828 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); 1829 if (foundEntry) { 1830 // it does match 1831 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); 1832 } else { 1833 // skip to any region tag or java-type variant 1834 char * inLocalePtr = inLocaleString; 1835 while (*inLocalePtr != 0 && *inLocalePtr != '_') { 1836 inLocalePtr++; 1837 } 1838 // if there is still a region tag, turn it into a language variant <1.19> 1839 if (*inLocalePtr == '_') { 1840 // handle 3-digit regions in addition to 2-letter ones 1841 char * regionTag = inLocalePtr++; 1842 long expectedLength = 0; 1843 if ( isalpha(*inLocalePtr) ) { 1844 while ( isalpha(*(++inLocalePtr)) ) 1845 ; 1846 expectedLength = 3; 1847 } else if ( isdigit(*inLocalePtr) ) { 1848 while ( isdigit(*(++inLocalePtr)) ) 1849 ; 1850 expectedLength = 4; 1851 } 1852 *regionTag = (inLocalePtr - regionTag == expectedLength)? '-': 0; 1853 } 1854 // anything else at/after '_' just gets deleted 1855 *inLocalePtr = 0; 1856 } 1857 1858 // E. Re-append any key-value strings, now canonical // <1.10><1.17> 1859 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); 1860 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); 1861 1862 // All done, return what we came up with. 1863 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); 1864 } 1865 1866 return outStringRef; 1867 } 1868 1869 1870 CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromString(CFAllocatorRef allocator, CFStringRef localeIdentifier) { 1871 char inLocaleString[kLocaleIdentifierCStringMax]; 1872 CFStringRef outStringRef = NULL; 1873 1874 if ( localeIdentifier && CFStringGetCString(localeIdentifier, inLocaleString, sizeof(inLocaleString), kCFStringEncodingASCII) ) { 1875 KeyStringToResultString testEntry; 1876 KeyStringToResultString * foundEntry; 1877 char keyValueString[sizeof(inLocaleString)]; // <1.10> 1878 char varKeyValueString[sizeof(inLocaleString)]; // <1.17> 1879 1880 _GetKeyValueString(inLocaleString, keyValueString); // <1.10> 1881 testEntry.result = NULL; 1882 1883 // A. First check if input string matches an old-style Apple string that has a replacement 1884 // (do this before case normalization) 1885 testEntry.key = inLocaleString; 1886 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, oldAppleLocaleToCanonical, kNumOldAppleLocaleToCanonical, 1887 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); 1888 if (foundEntry) { 1889 // It does match, so replace old string with new // <1.10> 1890 strlcpy(inLocaleString, foundEntry->result, sizeof(inLocaleString)); 1891 varKeyValueString[0] = 0; 1892 } else { 1893 char * langRegSubtag = NULL; 1894 char * regionTag = NULL; 1895 1896 // B. No match with an old-style string, use input string but update codes, normalize case, etc. 1897 _UpdateFullLocaleString(inLocaleString, sizeof(inLocaleString), &langRegSubtag, ®ionTag, varKeyValueString); // <1.10><1.17> 1898 1899 1900 // C. Now strip defaults that are implied by other fields. 1901 1902 // 1. If an ISO 3166 region tag matches an ISO 3166 regional language variant subtag, strip the latter. 1903 if ( langRegSubtag && regionTag && strncmp(langRegSubtag+1, regionTag+1, 2) == 0 ) { 1904 _DeleteCharsAtPointer(langRegSubtag, 3); 1905 } 1906 1907 // 2. Strip defaults in input string based on final region tag in locale string 1908 // (mainly for Chinese, to strip -Hans for _CN/_SG, -Hant for _TW/_HK/_MO) 1909 if ( regionTag ) { 1910 testEntry.key = regionTag; 1911 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringRegionToDefaults, kNumLocaleStringRegionToDefaults, 1912 sizeof(KeyStringToResultString), _CompareTestEntryToTableEntryKey ); 1913 if (foundEntry) { 1914 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); 1915 } 1916 } 1917 1918 // 3. Strip defaults in input string based on initial part of locale string 1919 // (mainly to strip default script tag for a language) 1920 testEntry.key = inLocaleString; 1921 foundEntry = (KeyStringToResultString *)bsearch( &testEntry, localeStringPrefixToDefaults, kNumLocaleStringPrefixToDefaults, 1922 sizeof(KeyStringToResultString), _CompareTestEntryPrefixToTableEntryKey ); 1923 if (foundEntry) { 1924 // The input string begins with a character sequence for which 1925 // there are default substrings which should be stripped if present 1926 _RemoveSubstringsIfPresent(inLocaleString, foundEntry->result); 1927 } 1928 } 1929 1930 // D. Re-append any key-value strings, now canonical // <1.10><1.17> 1931 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), varKeyValueString ); 1932 _AppendKeyValueString( inLocaleString, sizeof(inLocaleString), keyValueString ); 1933 1934 // Now create the CFString (even if empty!) 1935 outStringRef = CFStringCreateWithCString(allocator, inLocaleString, kCFStringEncodingASCII); 1936 } 1937 1938 return outStringRef; 1939 } 1940 1941 // CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes, based on 1942 // the first part of the SPI CFBundleCopyLocalizationForLocalizationInfo in CFBundle_Resources.c 1943 CFStringRef CFLocaleCreateCanonicalLocaleIdentifierFromScriptManagerCodes(CFAllocatorRef allocator, LangCode lcode, RegionCode rcode) { 1944 CFStringRef result = NULL; 1945 if (0 <= rcode && rcode < kNumRegionCodeToLocaleString) { 1946 const char *localeString = regionCodeToLocaleString[rcode]; 1947 if (localeString != NULL && *localeString != '\0') { 1948 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); 1949 } 1950 } 1951 if (result) return result; 1952 if (0 <= lcode && lcode < kNumLangCodeToLocaleString) { 1953 const char *localeString = langCodeToLocaleString[lcode]; 1954 if (localeString != NULL && *localeString != '\0') { 1955 result = CFStringCreateWithCStringNoCopy(allocator, localeString, kCFStringEncodingASCII, kCFAllocatorNull); 1956 } 1957 } 1958 return result; 1959 } 1960 1961 1962 /* 1963 SPI: CFLocaleGetLanguageRegionEncodingForLocaleIdentifier gets the appropriate language and region codes, 1964 and the default legacy script code and encoding, for the specified locale (or language) string. 1965 Returns false if CFLocale has no information about the given locale (in which case none of the by-reference return values are set); 1966 otherwise may set *langCode and/or *regCode to -1 if there is no appropriate legacy value for the locale. 1967 This is a replacement for the CFBundle SPI CFBundleGetLocalizationInfoForLocalization (which was intended to be temporary and transitional); 1968 this function is more up-to-date in its handling of locale strings, and is in CFLocale where this functionality should belong. Compared 1969 to CFBundleGetLocalizationInfoForLocalization, this function does not spcially interpret a NULL localeIdentifier to mean use the single most 1970 preferred localization in the current context (this function returns NO for a NULL localeIdentifier); and in this function 1971 langCode, regCode, and scriptCode are all SInt16* (not SInt32* like the equivalent parameters in CFBundleGetLocalizationInfoForLocalization). 1972 */ 1973 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 1974 static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ); 1975 #endif 1976 1977 Boolean CFLocaleGetLanguageRegionEncodingForLocaleIdentifier(CFStringRef localeIdentifier, LangCode *langCode, RegionCode *regCode, ScriptCode *scriptCode, CFStringEncoding *stringEncoding) { 1978 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 1979 Boolean returnValue = false; 1980 CFStringRef canonicalIdentifier = CFLocaleCreateCanonicalLocaleIdentifierFromString(NULL, localeIdentifier); 1981 if (canonicalIdentifier) { 1982 char localeCString[kLocaleIdentifierCStringMax]; 1983 if ( CFStringGetCString(canonicalIdentifier, localeCString, sizeof(localeCString), kCFStringEncodingASCII) ) { 1984 UErrorCode icuStatus = U_ZERO_ERROR; 1985 int32_t languagelength; 1986 char searchString[ULOC_LANG_CAPACITY + ULOC_FULLNAME_CAPACITY]; 1987 1988 languagelength = uloc_getLanguage( localeCString, searchString, ULOC_LANG_CAPACITY, &icuStatus ); 1989 if ( U_SUCCESS(icuStatus) && languagelength > 0 ) { 1990 // OK, here we have at least a language code, check for other components in order 1991 LocaleToLegacyCodes searchEntry = { (const char *)searchString, 0, 0, 0 }; 1992 const LocaleToLegacyCodes * foundEntryPtr; 1993 int32_t componentLength; 1994 char componentString[ULOC_FULLNAME_CAPACITY]; 1995 1996 languagelength = strlen(searchString); // in case it got truncated 1997 icuStatus = U_ZERO_ERROR; 1998 componentLength = uloc_getScript( localeCString, componentString, sizeof(componentString), &icuStatus ); 1999 if ( U_FAILURE(icuStatus) || componentLength == 0 ) { 2000 icuStatus = U_ZERO_ERROR; 2001 componentLength = uloc_getCountry( localeCString, componentString, sizeof(componentString), &icuStatus ); 2002 if ( U_FAILURE(icuStatus) || componentLength == 0 ) { 2003 icuStatus = U_ZERO_ERROR; 2004 componentLength = uloc_getVariant( localeCString, componentString, sizeof(componentString), &icuStatus ); 2005 if ( U_FAILURE(icuStatus) ) { 2006 componentLength = 0; 2007 } 2008 } 2009 } 2010 2011 // Append whichever other component we first found 2012 if (componentLength > 0) { 2013 strlcat(searchString, "_", sizeof(searchString)); 2014 strlcat(searchString, componentString, sizeof(searchString)); 2015 } 2016 2017 // Search 2018 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); 2019 if (foundEntryPtr == NULL && (int32_t) strlen(searchString) > languagelength) { 2020 // truncate to language al;one and try again 2021 searchString[languagelength] = 0; 2022 foundEntryPtr = (const LocaleToLegacyCodes *)bsearch( &searchEntry, localeToLegacyCodes, kNumLocaleToLegacyCodes, sizeof(LocaleToLegacyCodes), CompareLocaleToLegacyCodesEntries ); 2023 } 2024 2025 // If found a matching entry, return requested values 2026 if (foundEntryPtr) { 2027 returnValue = true; 2028 if (langCode) *langCode = foundEntryPtr->langCode; 2029 if (regCode) *regCode = foundEntryPtr->regCode; 2030 if (stringEncoding) *stringEncoding = foundEntryPtr->encoding; 2031 if (scriptCode) { 2032 // map CFStringEncoding to ScriptCode 2033 if (foundEntryPtr->encoding < 33/*kCFStringEncodingMacSymbol*/) { 2034 *scriptCode = foundEntryPtr->encoding; 2035 } else { 2036 switch (foundEntryPtr->encoding) { 2037 case 0x8C/*kCFStringEncodingMacFarsi*/: *scriptCode = 4/*smArabic*/; break; 2038 case 0x98/*kCFStringEncodingMacUkrainian*/: *scriptCode = 7/*smCyrillic*/; break; 2039 case 0xEC/*kCFStringEncodingMacInuit*/: *scriptCode = 28/*smEthiopic*/; break; 2040 case 0xFC/*kCFStringEncodingMacVT100*/: *scriptCode = 32/*smUninterp*/; break; 2041 default: *scriptCode = 0/*smRoman*/; break; 2042 } 2043 } 2044 } 2045 } 2046 } 2047 } 2048 CFRelease(canonicalIdentifier); 2049 } 2050 return returnValue; 2051 #else 2052 return false; 2053 #endif 2054 } 2055 2056 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 2057 static int CompareLocaleToLegacyCodesEntries( const void *entry1, const void *entry2 ) { 2058 const char * localeString1 = ((const LocaleToLegacyCodes *)entry1)->locale; 2059 const char * localeString2 = ((const LocaleToLegacyCodes *)entry2)->locale; 2060 return strcmp(localeString1, localeString2); 2061 } 2062 #endif 2063 2064 CFDictionaryRef CFLocaleCreateComponentsFromLocaleIdentifier(CFAllocatorRef allocator, CFStringRef localeID) { 2065 CFMutableDictionaryRef working = CFDictionaryCreateMutable(allocator, 10, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); 2066 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 2067 char cLocaleID[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2068 char buffer[ULOC_FULLNAME_CAPACITY+ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2069 2070 UErrorCode icuStatus = U_ZERO_ERROR; 2071 int32_t length = 0; 2072 2073 if (!localeID) goto out; 2074 2075 // Extract the C string locale ID, for ICU 2076 CFIndex outBytes = 0; 2077 CFStringGetBytes(localeID, CFRangeMake(0, CFStringGetLength(localeID)), kCFStringEncodingASCII, (UInt8) '?', true, (unsigned char *)cLocaleID, sizeof(cLocaleID)/sizeof(char) - 1, &outBytes); 2078 cLocaleID[outBytes] = '\0'; 2079 2080 // Get the components 2081 length = uloc_getLanguage(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); 2082 if (U_SUCCESS(icuStatus) && length > 0) 2083 { 2084 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); 2085 CFDictionaryAddValue(working, kCFLocaleLanguageCodeKey, string); 2086 CFRelease(string); 2087 } 2088 icuStatus = U_ZERO_ERROR; 2089 2090 length = uloc_getScript(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); 2091 if (U_SUCCESS(icuStatus) && length > 0) 2092 { 2093 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); 2094 CFDictionaryAddValue(working, kCFLocaleScriptCodeKey, string); 2095 CFRelease(string); 2096 } 2097 icuStatus = U_ZERO_ERROR; 2098 2099 length = uloc_getCountry(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); 2100 if (U_SUCCESS(icuStatus) && length > 0) 2101 { 2102 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); 2103 CFDictionaryAddValue(working, kCFLocaleCountryCodeKey, string); 2104 CFRelease(string); 2105 } 2106 icuStatus = U_ZERO_ERROR; 2107 2108 length = uloc_getVariant(cLocaleID, buffer, sizeof(buffer)/sizeof(char), &icuStatus); 2109 if (U_SUCCESS(icuStatus) && length > 0) 2110 { 2111 CFStringRef string = CFStringCreateWithBytes(allocator, (UInt8 *)buffer, length, kCFStringEncodingASCII, true); 2112 CFDictionaryAddValue(working, kCFLocaleVariantCodeKey, string); 2113 CFRelease(string); 2114 } 2115 icuStatus = U_ZERO_ERROR; 2116 2117 // Now get the keywords; open an enumerator on them 2118 UEnumeration *iter = uloc_openKeywords(cLocaleID, &icuStatus); 2119 const char *locKey = NULL; 2120 int32_t locKeyLen = 0; 2121 while ((locKey = uenum_next(iter, &locKeyLen, &icuStatus)) && U_SUCCESS(icuStatus)) 2122 { 2123 char locValue[ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2124 2125 // Get the value for this keyword 2126 if (uloc_getKeywordValue(cLocaleID, locKey, locValue, sizeof(locValue)/sizeof(char), &icuStatus) > 0 2127 && U_SUCCESS(icuStatus)) 2128 { 2129 CFStringRef key = CFStringCreateWithBytes(allocator, (UInt8 *)locKey, strlen(locKey), kCFStringEncodingASCII, true); 2130 CFStringRef value = CFStringCreateWithBytes(allocator, (UInt8 *)locValue, strlen(locValue), kCFStringEncodingASCII, true); 2131 if (key && value) 2132 CFDictionaryAddValue(working, key, value); 2133 if (key) 2134 CFRelease(key); 2135 if (value) 2136 CFRelease(value); 2137 } 2138 } 2139 uenum_close(iter); 2140 2141 out:; 2142 #endif 2143 // Convert to an immutable dictionary and return 2144 CFDictionaryRef result = CFDictionaryCreateCopy(allocator, working); 2145 CFRelease(working); 2146 return result; 2147 } 2148 2149 static char *__CStringFromString(CFStringRef str) { 2150 if (!str) return NULL; 2151 CFRange rg = CFRangeMake(0, CFStringGetLength(str)); 2152 CFIndex neededLength = 0; 2153 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, NULL, 0, &neededLength); 2154 char *buf = (char *)malloc(neededLength + 1); 2155 CFStringGetBytes(str, rg, kCFStringEncodingASCII, (UInt8)'?', false, (uint8_t *)buf, neededLength, &neededLength); 2156 buf[neededLength] = '\0'; 2157 return buf; 2158 } 2159 2160 CFStringRef CFLocaleCreateLocaleIdentifierFromComponents(CFAllocatorRef allocator, CFDictionaryRef dictionary) { 2161 if (!dictionary) return NULL; 2162 2163 CFIndex cnt = CFDictionaryGetCount(dictionary); 2164 STACK_BUFFER_DECL(CFStringRef, values, cnt); 2165 STACK_BUFFER_DECL(CFStringRef, keys, cnt); 2166 CFDictionaryGetKeysAndValues(dictionary, (const void **)keys, (const void **)values); 2167 2168 char *language = NULL, *script = NULL, *country = NULL, *variant = NULL; 2169 for (CFIndex idx = 0; idx < cnt; idx++) { 2170 if (CFEqual(kCFLocaleLanguageCodeKey, keys[idx])) { 2171 language = __CStringFromString(values[idx]); 2172 keys[idx] = NULL; 2173 } else if (CFEqual(kCFLocaleScriptCodeKey, keys[idx])) { 2174 script = __CStringFromString(values[idx]); 2175 keys[idx] = NULL; 2176 } else if (CFEqual(kCFLocaleCountryCodeKey, keys[idx])) { 2177 country = __CStringFromString(values[idx]); 2178 keys[idx] = NULL; 2179 } else if (CFEqual(kCFLocaleVariantCodeKey, keys[idx])) { 2180 variant = __CStringFromString(values[idx]); 2181 keys[idx] = NULL; 2182 } 2183 } 2184 2185 char *buf1 = NULL; // (|L)(|_S)(|_C|_C_V|__V) 2186 asprintf(&buf1, "%s%s%s%s%s%s%s", language ? language : "", script ? "_" : "", script ? script : "", (country || variant ? "_" : ""), country ? country : "", variant ? "_" : "", variant ? variant : ""); 2187 2188 char cLocaleID[2 * ULOC_FULLNAME_CAPACITY + 2 * ULOC_KEYWORD_AND_VALUES_CAPACITY]; 2189 strlcpy(cLocaleID, buf1, sizeof(cLocaleID)); 2190 free(language); 2191 free(script); 2192 free(country); 2193 free(variant); 2194 free(buf1); 2195 2196 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX 2197 for (CFIndex idx = 0; idx < cnt; idx++) { 2198 if (keys[idx]) { 2199 char *key = __CStringFromString(keys[idx]); 2200 char *value; 2201 if (0 == strcmp(key, "kCFLocaleCalendarKey")) { 2202 // For interchangeability convenience, we alternatively allow a 2203 // calendar object to be passed in, with the alternate key, and 2204 // we'll extract the identifier. 2205 CFCalendarRef cal = (CFCalendarRef)values[idx]; 2206 CFStringRef ident = CFCalendarGetIdentifier(cal); 2207 value = __CStringFromString(ident); 2208 char *oldkey = key; 2209 key = strdup("calendar"); 2210 free(oldkey); 2211 } else { 2212 value = __CStringFromString(values[idx]); 2213 } 2214 UErrorCode status = U_ZERO_ERROR; 2215 uloc_setKeywordValue(key, value, cLocaleID, sizeof(cLocaleID), &status); 2216 free(key); 2217 free(value); 2218 } 2219 } 2220 #endif 2221 2222 return CFStringCreateWithCString(allocator, cLocaleID, kCFStringEncodingASCII); 2223 } 2224