IntlObject.cpp
1 /* 2 * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family) 3 * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com) 4 * Copyright (C) 2016-2020 Apple Inc. All rights reserved. 5 * Copyright (C) 2020 Sony Interactive Entertainment Inc. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 18 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "config.h" 30 #include "IntlObject.h" 31 32 #include "Error.h" 33 #include "FunctionPrototype.h" 34 #include "IntlCollator.h" 35 #include "IntlCollatorConstructor.h" 36 #include "IntlCollatorPrototype.h" 37 #include "IntlDateTimeFormatConstructor.h" 38 #include "IntlDateTimeFormatPrototype.h" 39 #include "IntlDisplayNames.h" 40 #include "IntlDisplayNamesConstructor.h" 41 #include "IntlDisplayNamesPrototype.h" 42 #include "IntlListFormat.h" 43 #include "IntlListFormatConstructor.h" 44 #include "IntlListFormatPrototype.h" 45 #include "IntlLocale.h" 46 #include "IntlLocaleConstructor.h" 47 #include "IntlLocalePrototype.h" 48 #include "IntlNumberFormatConstructor.h" 49 #include "IntlNumberFormatPrototype.h" 50 #include "IntlObjectInlines.h" 51 #include "IntlPluralRulesConstructor.h" 52 #include "IntlPluralRulesPrototype.h" 53 #include "IntlRelativeTimeFormatConstructor.h" 54 #include "IntlRelativeTimeFormatPrototype.h" 55 #include "IntlSegmenter.h" 56 #include "IntlSegmenterConstructor.h" 57 #include "IntlSegmenterPrototype.h" 58 #include "JSCInlines.h" 59 #include "Options.h" 60 #include <unicode/ubrk.h> 61 #include <unicode/ucol.h> 62 #include <unicode/ufieldpositer.h> 63 #include <unicode/uloc.h> 64 #include <unicode/unumsys.h> 65 #include <wtf/Assertions.h> 66 #include <wtf/Language.h> 67 #include <wtf/NeverDestroyed.h> 68 #include <wtf/text/StringBuilder.h> 69 #include <wtf/text/StringImpl.h> 70 #include <wtf/unicode/icu/ICUHelpers.h> 71 72 namespace JSC { 73 74 STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject); 75 76 static JSC_DECLARE_HOST_FUNCTION(intlObjectFuncGetCanonicalLocales); 77 78 static JSValue createCollatorConstructor(VM& vm, JSObject* object) 79 { 80 IntlObject* intlObject = jsCast<IntlObject*>(object); 81 JSGlobalObject* globalObject = intlObject->globalObject(vm); 82 return IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlCollatorPrototype*>(globalObject->collatorStructure()->storedPrototypeObject())); 83 } 84 85 static JSValue createDateTimeFormatConstructor(VM& vm, JSObject* object) 86 { 87 IntlObject* intlObject = jsCast<IntlObject*>(object); 88 JSGlobalObject* globalObject = intlObject->globalObject(vm); 89 return globalObject->dateTimeFormatConstructor(); 90 } 91 92 static JSValue createDisplayNamesConstructor(VM& vm, JSObject* object) 93 { 94 IntlObject* intlObject = jsCast<IntlObject*>(object); 95 JSGlobalObject* globalObject = intlObject->globalObject(vm); 96 return IntlDisplayNamesConstructor::create(vm, IntlDisplayNamesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlDisplayNamesPrototype*>(globalObject->displayNamesStructure()->storedPrototypeObject())); 97 } 98 99 static JSValue createListFormatConstructor(VM& vm, JSObject* object) 100 { 101 IntlObject* intlObject = jsCast<IntlObject*>(object); 102 JSGlobalObject* globalObject = intlObject->globalObject(vm); 103 return IntlListFormatConstructor::create(vm, IntlListFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlListFormatPrototype*>(globalObject->listFormatStructure()->storedPrototypeObject())); 104 } 105 106 static JSValue createLocaleConstructor(VM& vm, JSObject* object) 107 { 108 IntlObject* intlObject = jsCast<IntlObject*>(object); 109 JSGlobalObject* globalObject = intlObject->globalObject(vm); 110 return IntlLocaleConstructor::create(vm, IntlLocaleConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlLocalePrototype*>(globalObject->localeStructure()->storedPrototypeObject())); 111 } 112 113 static JSValue createNumberFormatConstructor(VM& vm, JSObject* object) 114 { 115 IntlObject* intlObject = jsCast<IntlObject*>(object); 116 JSGlobalObject* globalObject = intlObject->globalObject(vm); 117 return globalObject->numberFormatConstructor(); 118 } 119 120 static JSValue createPluralRulesConstructor(VM& vm, JSObject* object) 121 { 122 IntlObject* intlObject = jsCast<IntlObject*>(object); 123 JSGlobalObject* globalObject = intlObject->globalObject(vm); 124 return IntlPluralRulesConstructor::create(vm, IntlPluralRulesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlPluralRulesPrototype*>(globalObject->pluralRulesStructure()->storedPrototypeObject())); 125 } 126 127 static JSValue createRelativeTimeFormatConstructor(VM& vm, JSObject* object) 128 { 129 IntlObject* intlObject = jsCast<IntlObject*>(object); 130 JSGlobalObject* globalObject = intlObject->globalObject(vm); 131 return IntlRelativeTimeFormatConstructor::create(vm, IntlRelativeTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlRelativeTimeFormatPrototype*>(globalObject->relativeTimeFormatStructure()->storedPrototypeObject())); 132 } 133 134 static JSValue createSegmenterConstructor(VM& vm, JSObject* object) 135 { 136 IntlObject* intlObject = jsCast<IntlObject*>(object); 137 JSGlobalObject* globalObject = intlObject->globalObject(vm); 138 return IntlSegmenterConstructor::create(vm, IntlSegmenterConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlSegmenterPrototype*>(globalObject->segmenterStructure()->storedPrototypeObject())); 139 } 140 141 } 142 143 #include "IntlObject.lut.h" 144 145 namespace JSC { 146 147 /* Source for IntlObject.lut.h 148 @begin intlObjectTable 149 getCanonicalLocales intlObjectFuncGetCanonicalLocales DontEnum|Function 1 150 Collator createCollatorConstructor DontEnum|PropertyCallback 151 DateTimeFormat createDateTimeFormatConstructor DontEnum|PropertyCallback 152 Locale createLocaleConstructor DontEnum|PropertyCallback 153 NumberFormat createNumberFormatConstructor DontEnum|PropertyCallback 154 PluralRules createPluralRulesConstructor DontEnum|PropertyCallback 155 RelativeTimeFormat createRelativeTimeFormatConstructor DontEnum|PropertyCallback 156 Segmenter createSegmenterConstructor DontEnum|PropertyCallback 157 @end 158 */ 159 160 struct MatcherResult { 161 String locale; 162 String extension; 163 size_t extensionIndex { 0 }; 164 }; 165 166 const ClassInfo IntlObject::s_info = { "Intl", &Base::s_info, &intlObjectTable, nullptr, CREATE_METHOD_TABLE(IntlObject) }; 167 168 void UFieldPositionIteratorDeleter::operator()(UFieldPositionIterator* iterator) const 169 { 170 if (iterator) 171 ufieldpositer_close(iterator); 172 } 173 174 IntlObject::IntlObject(VM& vm, Structure* structure) 175 : Base(vm, structure) 176 { 177 } 178 179 IntlObject* IntlObject::create(VM& vm, JSGlobalObject* globalObject, Structure* structure) 180 { 181 IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure); 182 object->finishCreation(vm, globalObject); 183 return object; 184 } 185 186 void IntlObject::finishCreation(VM& vm, JSGlobalObject*) 187 { 188 Base::finishCreation(vm); 189 ASSERT(inherits(vm, info())); 190 JSC_TO_STRING_TAG_WITHOUT_TRANSITION(); 191 #if HAVE(ICU_U_LOCALE_DISPLAY_NAMES) 192 putDirectWithoutTransition(vm, vm.propertyNames->DisplayNames, createDisplayNamesConstructor(vm, this), static_cast<unsigned>(PropertyAttribute::DontEnum)); 193 #else 194 UNUSED_PARAM(&createDisplayNamesConstructor); 195 #endif 196 #if HAVE(ICU_U_LIST_FORMATTER) 197 putDirectWithoutTransition(vm, vm.propertyNames->ListFormat, createListFormatConstructor(vm, this), static_cast<unsigned>(PropertyAttribute::DontEnum)); 198 #else 199 UNUSED_PARAM(&createListFormatConstructor); 200 #endif 201 } 202 203 Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) 204 { 205 return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); 206 } 207 208 static Vector<StringView> unicodeExtensionComponents(StringView extension) 209 { 210 // UnicodeExtensionSubtags (extension) 211 // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags 212 213 auto extensionLength = extension.length(); 214 if (extensionLength < 3) 215 return { }; 216 217 Vector<StringView> subtags; 218 size_t subtagStart = 3; // Skip initial -u-. 219 size_t valueStart = 3; 220 bool isLeading = true; 221 for (size_t index = subtagStart; index < extensionLength; ++index) { 222 if (extension[index] == '-') { 223 if (index - subtagStart == 2) { 224 // Tag is a key, first append prior key's value if there is one. 225 if (subtagStart - valueStart > 1) 226 subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); 227 subtags.append(extension.substring(subtagStart, index - subtagStart)); 228 valueStart = index + 1; 229 isLeading = false; 230 } else if (isLeading) { 231 // Leading subtags before first key. 232 subtags.append(extension.substring(subtagStart, index - subtagStart)); 233 valueStart = index + 1; 234 } 235 subtagStart = index + 1; 236 } 237 } 238 if (extensionLength - subtagStart == 2) { 239 // Trailing an extension key, first append prior key's value if there is one. 240 if (subtagStart - valueStart > 1) 241 subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1)); 242 valueStart = subtagStart; 243 } 244 // Append final key's value. 245 subtags.append(extension.substring(valueStart, extensionLength - valueStart)); 246 return subtags; 247 } 248 249 Vector<char, 32> localeIDBufferForLanguageTag(const CString& tag) 250 { 251 if (!tag.length()) 252 return { }; 253 254 UErrorCode status = U_ZERO_ERROR; 255 Vector<char, 32> buffer(32); 256 int32_t parsedLength; 257 auto bufferLength = uloc_forLanguageTag(tag.data(), buffer.data(), buffer.size(), &parsedLength, &status); 258 if (needsToGrowToProduceCString(status)) { 259 // Before ICU 64, there's a chance uloc_forLanguageTag will "buffer overflow" while requesting a *smaller* size. 260 buffer.resize(bufferLength + 1); 261 status = U_ZERO_ERROR; 262 uloc_forLanguageTag(tag.data(), buffer.data(), bufferLength + 1, &parsedLength, &status); 263 } 264 if (U_FAILURE(status) || parsedLength != static_cast<int32_t>(tag.length())) 265 return { }; 266 267 ASSERT(buffer.contains('\0')); 268 return buffer; 269 } 270 271 Vector<char, 32> canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(Vector<char, 32>&& buffer) 272 { 273 StringView locale(buffer.data(), buffer.size()); 274 ASSERT(locale.is8Bit()); 275 size_t extensionIndex = locale.find("-u-"); 276 if (extensionIndex == notFound) 277 return WTFMove(buffer); 278 279 // Since ICU's canonicalization is incomplete, we need to perform some of canonicalization here. 280 size_t extensionLength = locale.length() - extensionIndex; 281 size_t end = extensionIndex + 3; 282 while (end < locale.length()) { 283 end = locale.find('-', end); 284 if (end == notFound) 285 break; 286 // Found another singleton. 287 if (end + 2 < locale.length() && locale[end + 2] == '-') { 288 extensionLength = end - extensionIndex; 289 break; 290 } 291 end++; 292 } 293 294 Vector<char, 32> result; 295 result.append(buffer.data(), extensionIndex + 2); // "-u" is included. 296 StringView extension = locale.substring(extensionIndex, extensionLength); 297 ASSERT(extension.is8Bit()); 298 auto subtags = unicodeExtensionComponents(extension); 299 for (unsigned index = 0; index < subtags.size();) { 300 auto subtag = subtags[index]; 301 ASSERT(subtag.is8Bit()); 302 result.append('-'); 303 result.append(subtag.characters8(), subtag.length()); 304 305 if (subtag.length() != 2) { 306 ++index; 307 continue; 308 } 309 ASSERT(subtag.length() == 2); 310 311 // This is unicode extension key. 312 unsigned valueIndexStart = index + 1; 313 unsigned valueIndexEnd = valueIndexStart; 314 for (; valueIndexEnd < subtags.size(); ++valueIndexEnd) { 315 if (subtags[valueIndexEnd].length() == 2) 316 break; 317 } 318 // [valueIndexStart, valueIndexEnd) is value of this unicode extension. If there is no value, valueIndexStart == valueIndexEnd. 319 320 for (unsigned valueIndex = valueIndexStart; valueIndex < valueIndexEnd; ++valueIndex) { 321 auto value = subtags[valueIndex]; 322 if (value != "true"_s) { 323 result.append('-'); 324 result.append(value.characters8(), value.length()); 325 } 326 } 327 index = valueIndexEnd; 328 } 329 330 unsigned remainingStart = extensionIndex + extensionLength; 331 unsigned remainingLength = buffer.size() - remainingStart; 332 result.append(buffer.data() + remainingStart, remainingLength); 333 return result; 334 } 335 336 String languageTagForLocaleID(const char* localeID, bool isImmortal) 337 { 338 Vector<char, 32> buffer; 339 auto status = callBufferProducingFunction(uloc_toLanguageTag, localeID, buffer, false); 340 if (U_FAILURE(status)) 341 return String(); 342 343 auto createResult = [&](Vector<char, 32>&& buffer) -> String { 344 // This is used to store into static variables that may be shared across JSC execution threads. 345 // This must be immortal to make concurrent ref/deref safe. 346 if (isImmortal) 347 return StringImpl::createStaticStringImpl(buffer.data(), buffer.size()); 348 return String(buffer.data(), buffer.size()); 349 }; 350 351 return createResult(canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(WTFMove(buffer))); 352 } 353 354 // Ensure we have xx-ZZ whenever we have xx-Yyyy-ZZ. 355 static void addScriptlessLocaleIfNeeded(HashSet<String>& availableLocales, StringView locale) 356 { 357 if (locale.length() < 10) 358 return; 359 360 Vector<StringView, 3> subtags; 361 for (auto subtag : locale.split('-')) { 362 if (subtags.size() == 3) 363 return; 364 subtags.append(subtag); 365 } 366 367 if (subtags.size() != 3 || subtags[1].length() != 4 || subtags[2].length() > 3) 368 return; 369 370 Vector<char, 12> buffer; 371 ASSERT(subtags[0].is8Bit() && subtags[0].isAllASCII()); 372 buffer.append(reinterpret_cast<const char*>(subtags[0].characters8()), subtags[0].length()); 373 buffer.append('-'); 374 ASSERT(subtags[2].is8Bit() && subtags[2].isAllASCII()); 375 buffer.append(reinterpret_cast<const char*>(subtags[2].characters8()), subtags[2].length()); 376 377 availableLocales.add(StringImpl::createStaticStringImpl(buffer.data(), buffer.size())); 378 } 379 380 const HashSet<String>& intlAvailableLocales() 381 { 382 static LazyNeverDestroyed<HashSet<String>> availableLocales; 383 static std::once_flag initializeOnce; 384 std::call_once(initializeOnce, [&] { 385 availableLocales.construct(); 386 ASSERT(availableLocales->isEmpty()); 387 constexpr bool isImmortal = true; 388 int32_t count = uloc_countAvailable(); 389 for (int32_t i = 0; i < count; ++i) { 390 String locale = languageTagForLocaleID(uloc_getAvailable(i), isImmortal); 391 if (locale.isEmpty()) 392 continue; 393 availableLocales->add(locale); 394 addScriptlessLocaleIfNeeded(availableLocales.get(), locale); 395 } 396 }); 397 return availableLocales; 398 } 399 400 // This table is total ordering indexes for ASCII characters in UCA DUCET. 401 // It is generated from CLDR common/uca/allkeys_DUCET.txt. 402 // 403 // Rough overview of UCA is the followings. 404 // https://unicode.org/reports/tr10/#Main_Algorithm 405 // 406 // 1. Normalize each input string. 407 // 408 // 2. Produce an array of collation elements for each string. 409 // 410 // There are 3 (or 4) levels. And each character has 4 weights. We concatenate them into one sequence called collation elements. 411 // For example, "c" has `[.0706.0020.0002]`. And "ca◌́b" becomes `[.0706.0020.0002], [.06D9.0020.0002], [.0000.0021.0002], [.06EE.0020.0002]` 412 // We need to consider variable weighting (https://unicode.org/reports/tr10/#Variable_Weighting), but if it is Non-ignorable, we can just use 413 // the collation elements defined in the table. 414 // 415 // 3. Produce a sort key for each string from the arrays of collation elements. 416 // 417 // Generate sort key from collation elements. From lower levels to higher levels, we collect weights. But 0000 weight is skipped. 418 // Between levels, we insert 0000 weight if the boundary. 419 // 420 // string: "ca◌́b" 421 // collation elements: `[.0706.0020.0002], [.06D9.0020.0002], [.0000.0021.0002], [.06EE.0020.0002]` 422 // sort key: `0706 06D9 06EE 0000 0020 0020 0021 0020 0000 0002 0002 0002 0002` 423 // ^ ^ 424 // level boundary level boundary 425 // 426 // 4. Compare the two sort keys with a binary comparison operation. 427 // 428 // Key observations are the followings. 429 // 430 // 1. If an input is an ASCII string, UCA step-1 normalization does nothing. 431 // 2. If an input is an ASCII string, non-starters (https://unicode.org/reports/tr10/#UTS10-D33) does not exist. So no special handling in UCA step-2 is required. 432 // 3. If an input is an ASCII string, no multiple character collation elements exist. So no special handling in UCA step-2 is required. For example, "L·" is not ASCII. 433 // 4. UCA step-3 handles 0000 weighted characters specially. And ASCII contains these characters. But 0000 elements are used only for rare control characters. 434 // We can ignore this special handling if ASCII strings do not include control characters. 435 // 5. Except 0000 cases, all characters' level-1 weights are different. And level-2 weights are always 0020, which is lower than any level-1 weights. 436 // This means that binary comparison in UCA step-4 do not need to check level 2~ weights. 437 // 438 // Based on the above observation, our fast path handles ASCII strings excluding control characters. The following weight is recomputed weights from level-1 weights. 439 const uint8_t ducetWeights[128] = { 440 0, 0, 0, 0, 0, 0, 0, 0, 441 0, 1, 2, 3, 4, 5, 0, 0, 442 0, 0, 0, 0, 0, 0, 0, 0, 443 0, 0, 0, 0, 0, 0, 0, 0, 444 6, 12, 16, 28, 38, 29, 27, 15, 445 17, 18, 24, 32, 9, 8, 14, 25, 446 39, 40, 41, 42, 43, 44, 45, 46, 447 47, 48, 11, 10, 33, 34, 35, 13, 448 23, 50, 52, 54, 56, 58, 60, 62, 449 64, 66, 68, 70, 72, 74, 76, 78, 450 80, 82, 84, 86, 88, 90, 92, 94, 451 96, 98, 100, 19, 26, 20, 31, 7, 452 30, 49, 51, 53, 55, 57, 59, 61, 453 63, 65, 67, 69, 71, 73, 75, 77, 454 79, 81, 83, 85, 87, 89, 91, 93, 455 95, 97, 99, 21, 36, 22, 37, 0, 456 }; 457 458 const HashSet<String>& intlCollatorAvailableLocales() 459 { 460 static LazyNeverDestroyed<HashSet<String>> availableLocales; 461 static std::once_flag initializeOnce; 462 std::call_once(initializeOnce, [&] { 463 availableLocales.construct(); 464 ASSERT(availableLocales->isEmpty()); 465 constexpr bool isImmortal = true; 466 int32_t count = ucol_countAvailable(); 467 for (int32_t i = 0; i < count; ++i) { 468 String locale = languageTagForLocaleID(ucol_getAvailable(i), isImmortal); 469 if (locale.isEmpty()) 470 continue; 471 availableLocales->add(locale); 472 addScriptlessLocaleIfNeeded(availableLocales.get(), locale); 473 } 474 IntlCollator::checkICULocaleInvariants(availableLocales.get()); 475 }); 476 return availableLocales; 477 } 478 479 const HashSet<String>& intlSegmenterAvailableLocales() 480 { 481 static NeverDestroyed<HashSet<String>> cachedAvailableLocales; 482 HashSet<String>& availableLocales = cachedAvailableLocales.get(); 483 484 static std::once_flag initializeOnce; 485 std::call_once(initializeOnce, [&] { 486 ASSERT(availableLocales.isEmpty()); 487 constexpr bool isImmortal = true; 488 int32_t count = ubrk_countAvailable(); 489 for (int32_t i = 0; i < count; ++i) { 490 String locale = languageTagForLocaleID(ubrk_getAvailable(i), isImmortal); 491 if (locale.isEmpty()) 492 continue; 493 availableLocales.add(locale); 494 addScriptlessLocaleIfNeeded(availableLocales, locale); 495 } 496 }); 497 return availableLocales; 498 } 499 500 // https://tc39.es/ecma402/#sec-getoption 501 TriState intlBooleanOption(JSGlobalObject* globalObject, JSValue options, PropertyName property) 502 { 503 VM& vm = globalObject->vm(); 504 auto scope = DECLARE_THROW_SCOPE(vm); 505 506 if (options.isUndefined()) 507 return TriState::Indeterminate; 508 509 JSObject* opts = options.toObject(globalObject); 510 RETURN_IF_EXCEPTION(scope, TriState::Indeterminate); 511 512 JSValue value = opts->get(globalObject, property); 513 RETURN_IF_EXCEPTION(scope, TriState::Indeterminate); 514 515 if (value.isUndefined()) 516 return TriState::Indeterminate; 517 518 return triState(value.toBoolean(globalObject)); 519 } 520 521 String intlStringOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback) 522 { 523 // GetOption (options, property, type="string", values, fallback) 524 // https://tc39.github.io/ecma402/#sec-getoption 525 526 VM& vm = globalObject->vm(); 527 auto scope = DECLARE_THROW_SCOPE(vm); 528 529 if (options.isUndefined()) 530 return fallback; 531 532 JSObject* opts = options.toObject(globalObject); 533 RETURN_IF_EXCEPTION(scope, String()); 534 535 JSValue value = opts->get(globalObject, property); 536 RETURN_IF_EXCEPTION(scope, String()); 537 538 if (!value.isUndefined()) { 539 String stringValue = value.toWTFString(globalObject); 540 RETURN_IF_EXCEPTION(scope, String()); 541 542 if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) { 543 throwException(globalObject, scope, createRangeError(globalObject, notFound)); 544 return { }; 545 } 546 return stringValue; 547 } 548 549 return fallback; 550 } 551 552 unsigned intlNumberOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback) 553 { 554 // GetNumberOption (options, property, minimum, maximum, fallback) 555 // https://tc39.github.io/ecma402/#sec-getnumberoption 556 557 VM& vm = globalObject->vm(); 558 auto scope = DECLARE_THROW_SCOPE(vm); 559 560 if (options.isUndefined()) 561 return fallback; 562 563 JSObject* opts = options.toObject(globalObject); 564 RETURN_IF_EXCEPTION(scope, 0); 565 566 JSValue value = opts->get(globalObject, property); 567 RETURN_IF_EXCEPTION(scope, 0); 568 569 RELEASE_AND_RETURN(scope, intlDefaultNumberOption(globalObject, value, property, minimum, maximum, fallback)); 570 } 571 572 unsigned intlDefaultNumberOption(JSGlobalObject* globalObject, JSValue value, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback) 573 { 574 // DefaultNumberOption (value, minimum, maximum, fallback) 575 // https://tc39.github.io/ecma402/#sec-defaultnumberoption 576 577 VM& vm = globalObject->vm(); 578 auto scope = DECLARE_THROW_SCOPE(vm); 579 580 if (!value.isUndefined()) { 581 double doubleValue = value.toNumber(globalObject); 582 RETURN_IF_EXCEPTION(scope, 0); 583 584 if (!(doubleValue >= minimum && doubleValue <= maximum)) { 585 throwException(globalObject, scope, createRangeError(globalObject, *property.publicName() + " is out of range")); 586 return 0; 587 } 588 return static_cast<unsigned>(doubleValue); 589 } 590 return fallback; 591 } 592 593 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier 594 bool isUnicodeLocaleIdentifierType(StringView string) 595 { 596 ASSERT(!string.isNull()); 597 598 for (auto part : string.splitAllowingEmptyEntries('-')) { 599 auto length = part.length(); 600 if (length < 3 || length > 8) 601 return false; 602 603 for (auto character : part.codeUnits()) { 604 if (!isASCIIAlphanumeric(character)) 605 return false; 606 } 607 } 608 609 return true; 610 } 611 612 // https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid 613 static String canonicalizeLanguageTag(const CString& tag) 614 { 615 auto buffer = localeIDBufferForLanguageTag(tag); 616 if (buffer.isEmpty()) 617 return String(); 618 619 return languageTagForLocaleID(buffer.data()); 620 } 621 622 Vector<String> canonicalizeLocaleList(JSGlobalObject* globalObject, JSValue locales) 623 { 624 // CanonicalizeLocaleList (locales) 625 // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist 626 627 VM& vm = globalObject->vm(); 628 auto scope = DECLARE_THROW_SCOPE(vm); 629 630 Vector<String> seen; 631 632 if (locales.isUndefined()) 633 return seen; 634 635 JSObject* localesObject; 636 if (locales.isString() || locales.inherits<IntlLocale>(vm)) { 637 JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous)); 638 if (!localesArray) { 639 throwOutOfMemoryError(globalObject, scope); 640 return { }; 641 } 642 localesArray->push(globalObject, locales); 643 RETURN_IF_EXCEPTION(scope, Vector<String>()); 644 645 localesObject = localesArray; 646 } else { 647 localesObject = locales.toObject(globalObject); 648 RETURN_IF_EXCEPTION(scope, Vector<String>()); 649 } 650 651 // 6. Let len be ToLength(Get(O, "length")). 652 JSValue lengthProperty = localesObject->get(globalObject, vm.propertyNames->length); 653 RETURN_IF_EXCEPTION(scope, Vector<String>()); 654 655 uint64_t length = static_cast<uint64_t>(lengthProperty.toLength(globalObject)); 656 RETURN_IF_EXCEPTION(scope, Vector<String>()); 657 658 HashSet<String> seenSet; 659 for (uint64_t k = 0; k < length; ++k) { 660 bool kPresent = localesObject->hasProperty(globalObject, k); 661 RETURN_IF_EXCEPTION(scope, Vector<String>()); 662 663 if (kPresent) { 664 JSValue kValue = localesObject->get(globalObject, k); 665 RETURN_IF_EXCEPTION(scope, Vector<String>()); 666 667 if (!kValue.isString() && !kValue.isObject()) { 668 throwTypeError(globalObject, scope, "locale value must be a string or object"_s); 669 return { }; 670 } 671 672 String tag; 673 if (kValue.inherits<IntlLocale>(vm)) 674 tag = jsCast<IntlLocale*>(kValue)->toString(); 675 else { 676 JSString* string = kValue.toString(globalObject); 677 RETURN_IF_EXCEPTION(scope, Vector<String>()); 678 679 tag = string->value(globalObject); 680 RETURN_IF_EXCEPTION(scope, Vector<String>()); 681 } 682 683 if (isStructurallyValidLanguageTag(tag)) { 684 ASSERT(tag.isAllASCII()); 685 String canonicalizedTag = canonicalizeLanguageTag(tag.ascii()); 686 if (!canonicalizedTag.isNull()) { 687 if (seenSet.add(canonicalizedTag).isNewEntry) 688 seen.append(canonicalizedTag); 689 continue; 690 } 691 } 692 693 String errorMessage = tryMakeString("invalid language tag: ", tag); 694 if (UNLIKELY(!errorMessage)) { 695 throwException(globalObject, scope, createOutOfMemoryError(globalObject)); 696 return { }; 697 } 698 throwException(globalObject, scope, createRangeError(globalObject, errorMessage)); 699 return { }; 700 } 701 } 702 703 return seen; 704 } 705 706 String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale) 707 { 708 return bestAvailableLocale(locale, [&](const String& candidate) { 709 return availableLocales.contains(candidate); 710 }); 711 } 712 713 String defaultLocale(JSGlobalObject* globalObject) 714 { 715 // DefaultLocale () 716 // https://tc39.github.io/ecma402/#sec-defaultlocale 717 718 // WebCore's global objects will have their own ideas of how to determine the language. It may 719 // be determined by WebCore-specific logic like some WK settings. Usually this will return the 720 // same thing as userPreferredLanguages()[0]. 721 if (auto defaultLanguage = globalObject->globalObjectMethodTable()->defaultLanguage) { 722 String locale = canonicalizeLanguageTag(defaultLanguage().utf8()); 723 if (!locale.isEmpty()) 724 return locale; 725 } 726 727 Vector<String> languages = userPreferredLanguages(); 728 for (const auto& language : languages) { 729 String locale = canonicalizeLanguageTag(language.utf8()); 730 if (!locale.isEmpty()) 731 return locale; 732 } 733 734 // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user 735 // has configured some other language, but being wrong is better than crashing. 736 static LazyNeverDestroyed<String> icuDefaultLocalString; 737 static std::once_flag initializeOnce; 738 std::call_once(initializeOnce, [&] { 739 constexpr bool isImmortal = true; 740 icuDefaultLocalString.construct(languageTagForLocaleID(uloc_getDefault(), isImmortal)); 741 }); 742 if (!icuDefaultLocalString->isEmpty()) 743 return icuDefaultLocalString.get(); 744 745 return "en"_s; 746 } 747 748 String removeUnicodeLocaleExtension(const String& locale) 749 { 750 Vector<String> parts = locale.split('-'); 751 StringBuilder builder; 752 size_t partsSize = parts.size(); 753 bool atPrivate = false; 754 if (partsSize > 0) 755 builder.append(parts[0]); 756 for (size_t p = 1; p < partsSize; ++p) { 757 if (parts[p] == "x") 758 atPrivate = true; 759 if (!atPrivate && parts[p] == "u" && p + 1 < partsSize) { 760 // Skip the u- and anything that follows until another singleton. 761 // While the next part is part of the unicode extension, skip it. 762 while (p + 1 < partsSize && parts[p + 1].length() > 1) 763 ++p; 764 } else { 765 builder.append('-', parts[p]); 766 } 767 } 768 return builder.toString(); 769 } 770 771 static MatcherResult lookupMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) 772 { 773 // LookupMatcher (availableLocales, requestedLocales) 774 // https://tc39.github.io/ecma402/#sec-lookupmatcher 775 776 String locale; 777 String noExtensionsLocale; 778 String availableLocale; 779 for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) { 780 locale = requestedLocales[i]; 781 noExtensionsLocale = removeUnicodeLocaleExtension(locale); 782 availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale); 783 } 784 785 MatcherResult result; 786 if (!availableLocale.isEmpty()) { 787 result.locale = availableLocale; 788 if (locale != noExtensionsLocale) { 789 size_t extensionIndex = locale.find("-u-"); 790 RELEASE_ASSERT(extensionIndex != notFound); 791 792 size_t extensionLength = locale.length() - extensionIndex; 793 size_t end = extensionIndex + 3; 794 while (end < locale.length()) { 795 end = locale.find('-', end); 796 if (end == notFound) 797 break; 798 if (end + 2 < locale.length() && locale[end + 2] == '-') { 799 extensionLength = end - extensionIndex; 800 break; 801 } 802 end++; 803 } 804 result.extension = locale.substring(extensionIndex, extensionLength); 805 result.extensionIndex = extensionIndex; 806 } 807 } else 808 result.locale = defaultLocale(globalObject); 809 return result; 810 } 811 812 static MatcherResult bestFitMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) 813 { 814 // BestFitMatcher (availableLocales, requestedLocales) 815 // https://tc39.github.io/ecma402/#sec-bestfitmatcher 816 817 // FIXME: Implement something better than lookup. 818 return lookupMatcher(globalObject, availableLocales, requestedLocales); 819 } 820 821 constexpr ASCIILiteral relevantExtensionKeyString(RelevantExtensionKey key) 822 { 823 switch (key) { 824 #define JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS(lowerName, capitalizedName) \ 825 case RelevantExtensionKey::capitalizedName: \ 826 return #lowerName ""_s; 827 JSC_INTL_RELEVANT_EXTENSION_KEYS(JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS) 828 #undef JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS 829 } 830 return ASCIILiteral::null(); 831 } 832 833 ResolvedLocale resolveLocale(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, LocaleMatcher localeMatcher, const ResolveLocaleOptions& options, std::initializer_list<RelevantExtensionKey> relevantExtensionKeys, Vector<String> (*localeData)(const String&, RelevantExtensionKey)) 834 { 835 // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData) 836 // https://tc39.github.io/ecma402/#sec-resolvelocale 837 838 MatcherResult matcherResult = localeMatcher == LocaleMatcher::Lookup 839 ? lookupMatcher(globalObject, availableLocales, requestedLocales) 840 : bestFitMatcher(globalObject, availableLocales, requestedLocales); 841 842 String foundLocale = matcherResult.locale; 843 844 Vector<StringView> extensionSubtags; 845 if (!matcherResult.extension.isNull()) 846 extensionSubtags = unicodeExtensionComponents(matcherResult.extension); 847 848 ResolvedLocale resolved; 849 resolved.dataLocale = foundLocale; 850 851 String supportedExtension = "-u"_s; 852 for (RelevantExtensionKey key : relevantExtensionKeys) { 853 ASCIILiteral keyString = relevantExtensionKeyString(key); 854 Vector<String> keyLocaleData = localeData(foundLocale, key); 855 ASSERT(!keyLocaleData.isEmpty()); 856 857 String value = keyLocaleData[0]; 858 String supportedExtensionAddition; 859 860 if (!extensionSubtags.isEmpty()) { 861 size_t keyPos = extensionSubtags.find(keyString); 862 if (keyPos != notFound) { 863 if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) { 864 StringView requestedValue = extensionSubtags[keyPos + 1]; 865 auto dataPos = keyLocaleData.find(requestedValue); 866 if (dataPos != notFound) { 867 value = keyLocaleData[dataPos]; 868 supportedExtensionAddition = makeString('-', keyString, '-', value); 869 } 870 } else if (keyLocaleData.contains("true"_s)) { 871 value = "true"_s; 872 supportedExtensionAddition = makeString('-', keyString); 873 } 874 } 875 } 876 877 if (auto optionsValue = options[static_cast<unsigned>(key)]) { 878 // Undefined should not get added to the options, it won't displace the extension. 879 // Null will remove the extension. 880 if ((optionsValue->isNull() || keyLocaleData.contains(*optionsValue)) && *optionsValue != value) { 881 value = optionsValue.value(); 882 supportedExtensionAddition = String(); 883 } 884 } 885 resolved.extensions[static_cast<unsigned>(key)] = value; 886 supportedExtension.append(supportedExtensionAddition); 887 } 888 889 if (supportedExtension.length() > 2) { 890 StringView foundLocaleView(foundLocale); 891 foundLocale = makeString(foundLocaleView.substring(0, matcherResult.extensionIndex), supportedExtension, foundLocaleView.substring(matcherResult.extensionIndex)); 892 } 893 894 resolved.locale = WTFMove(foundLocale); 895 return resolved; 896 } 897 898 static JSArray* lookupSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) 899 { 900 // LookupSupportedLocales (availableLocales, requestedLocales) 901 // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales 902 903 VM& vm = globalObject->vm(); 904 auto scope = DECLARE_THROW_SCOPE(vm); 905 906 size_t len = requestedLocales.size(); 907 JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0); 908 if (!subset) { 909 throwOutOfMemoryError(globalObject, scope); 910 return nullptr; 911 } 912 913 unsigned index = 0; 914 for (size_t k = 0; k < len; ++k) { 915 const String& locale = requestedLocales[k]; 916 String noExtensionsLocale = removeUnicodeLocaleExtension(locale); 917 String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale); 918 if (!availableLocale.isNull()) { 919 subset->putDirectIndex(globalObject, index++, jsString(vm, locale)); 920 RETURN_IF_EXCEPTION(scope, nullptr); 921 } 922 } 923 924 return subset; 925 } 926 927 static JSArray* bestFitSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales) 928 { 929 // BestFitSupportedLocales (availableLocales, requestedLocales) 930 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales 931 932 // FIXME: Implement something better than lookup. 933 return lookupSupportedLocales(globalObject, availableLocales, requestedLocales); 934 } 935 936 JSValue supportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options) 937 { 938 // SupportedLocales (availableLocales, requestedLocales, options) 939 // https://tc39.github.io/ecma402/#sec-supportedlocales 940 941 VM& vm = globalObject->vm(); 942 auto scope = DECLARE_THROW_SCOPE(vm); 943 String matcher; 944 945 LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit); 946 RETURN_IF_EXCEPTION(scope, JSValue()); 947 948 if (localeMatcher == LocaleMatcher::BestFit) 949 RELEASE_AND_RETURN(scope, bestFitSupportedLocales(globalObject, availableLocales, requestedLocales)); 950 RELEASE_AND_RETURN(scope, lookupSupportedLocales(globalObject, availableLocales, requestedLocales)); 951 } 952 953 Vector<String> numberingSystemsForLocale(const String& locale) 954 { 955 static LazyNeverDestroyed<Vector<String>> availableNumberingSystems; 956 static std::once_flag initializeOnce; 957 std::call_once(initializeOnce, [&] { 958 availableNumberingSystems.construct(); 959 ASSERT(availableNumberingSystems->isEmpty()); 960 UErrorCode status = U_ZERO_ERROR; 961 UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status); 962 ASSERT(U_SUCCESS(status)); 963 964 int32_t resultLength; 965 // Numbering system names are always ASCII, so use char[]. 966 while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) { 967 ASSERT(U_SUCCESS(status)); 968 auto numsys = unumsys_openByName(result, &status); 969 ASSERT(U_SUCCESS(status)); 970 // Only support algorithmic if it is the default fot the locale, handled below. 971 if (!unumsys_isAlgorithmic(numsys)) 972 availableNumberingSystems->append(String(StringImpl::createStaticStringImpl(result, resultLength))); 973 unumsys_close(numsys); 974 } 975 uenum_close(numberingSystemNames); 976 }); 977 978 UErrorCode status = U_ZERO_ERROR; 979 UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status); 980 ASSERT(U_SUCCESS(status)); 981 String defaultSystemName(unumsys_getName(defaultSystem)); 982 unumsys_close(defaultSystem); 983 984 Vector<String> numberingSystems({ defaultSystemName }); 985 numberingSystems.appendVector(availableNumberingSystems.get()); 986 return numberingSystems; 987 } 988 989 // unicode_language_subtag = alpha{2,3} | alpha{5,8} ; 990 bool isUnicodeLanguageSubtag(StringView string) 991 { 992 auto length = string.length(); 993 return length >= 2 && length <= 8 && length != 4 && string.isAllSpecialCharacters<isASCIIAlpha>(); 994 } 995 996 // unicode_script_subtag = alpha{4} ; 997 bool isUnicodeScriptSubtag(StringView string) 998 { 999 return string.length() == 4 && string.isAllSpecialCharacters<isASCIIAlpha>(); 1000 } 1001 1002 // unicode_region_subtag = alpha{2} | digit{3} ; 1003 bool isUnicodeRegionSubtag(StringView string) 1004 { 1005 auto length = string.length(); 1006 return (length == 2 && string.isAllSpecialCharacters<isASCIIAlpha>()) 1007 || (length == 3 && string.isAllSpecialCharacters<isASCIIDigit>()); 1008 } 1009 1010 // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ; 1011 bool isUnicodeVariantSubtag(StringView string) 1012 { 1013 auto length = string.length(); 1014 if (length >= 5 && length <= 8) 1015 return string.isAllSpecialCharacters<isASCIIAlphanumeric>(); 1016 return length == 4 && isASCIIDigit(string[0]) && string.substring(1).isAllSpecialCharacters<isASCIIAlphanumeric>(); 1017 } 1018 1019 using VariantCode = uint64_t; 1020 static VariantCode parseVariantCode(StringView string) 1021 { 1022 ASSERT(isUnicodeVariantSubtag(string)); 1023 ASSERT(string.isAllASCII()); 1024 ASSERT(string.length() <= 8); 1025 ASSERT(string.length() >= 1); 1026 struct Code { 1027 LChar characters[8] { }; 1028 }; 1029 static_assert(std::is_unsigned_v<LChar>); 1030 static_assert(sizeof(VariantCode) == sizeof(Code)); 1031 Code code { }; 1032 for (unsigned index = 0; index < string.length(); ++index) 1033 code.characters[index] = toASCIILower(string[index]); 1034 VariantCode result = bitwise_cast<VariantCode>(code); 1035 ASSERT(result); // Not possible since some characters exist. 1036 ASSERT(result != static_cast<VariantCode>(-1)); // Not possible since all characters are ASCII (not Latin-1). 1037 return result; 1038 } 1039 1040 static unsigned convertToUnicodeSingletonIndex(UChar singleton) 1041 { 1042 ASSERT(isASCIIAlphanumeric(singleton)); 1043 singleton = toASCIILower(singleton); 1044 // 0 - 9 => numeric 1045 // 10 - 35 => alpha 1046 if (isASCIIDigit(singleton)) 1047 return singleton - '0'; 1048 return (singleton - 'a') + 10; 1049 } 1050 static constexpr unsigned numberOfUnicodeSingletons = 10 + 26; // Digits + Alphabets. 1051 1052 static bool isUnicodeExtensionAttribute(StringView string) 1053 { 1054 auto length = string.length(); 1055 return length >= 3 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>(); 1056 } 1057 1058 static bool isUnicodeExtensionKey(StringView string) 1059 { 1060 return string.length() == 2 && isASCIIAlphanumeric(string[0]) && isASCIIAlpha(string[1]); 1061 } 1062 1063 static bool isUnicodeExtensionTypeComponent(StringView string) 1064 { 1065 auto length = string.length(); 1066 return length >= 3 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>(); 1067 } 1068 1069 static bool isUnicodePUExtensionValue(StringView string) 1070 { 1071 auto length = string.length(); 1072 return length >= 1 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>(); 1073 } 1074 1075 static bool isUnicodeOtherExtensionValue(StringView string) 1076 { 1077 auto length = string.length(); 1078 return length >= 2 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>(); 1079 } 1080 1081 static bool isUnicodeTKey(StringView string) 1082 { 1083 return string.length() == 2 && isASCIIAlpha(string[0]) && isASCIIDigit(string[1]); 1084 } 1085 1086 static bool isUnicodeTValueComponent(StringView string) 1087 { 1088 auto length = string.length(); 1089 return length >= 3 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>(); 1090 } 1091 1092 // The IsStructurallyValidLanguageTag abstract operation verifies that the locale argument (which must be a String value) 1093 // 1094 // represents a well-formed "Unicode BCP 47 locale identifier" as specified in Unicode Technical Standard 35 section 3.2, 1095 // does not include duplicate variant subtags, and 1096 // does not include duplicate singleton subtags. 1097 // 1098 // The abstract operation returns true if locale can be generated from the EBNF grammar in section 3.2 of the Unicode Technical Standard 35, 1099 // starting with unicode_locale_id, and does not contain duplicate variant or singleton subtags (other than as a private use subtag). 1100 // It returns false otherwise. Terminal value characters in the grammar are interpreted as the Unicode equivalents of the ASCII octet values given. 1101 // 1102 // https://unicode.org/reports/tr35/#Unicode_locale_identifier 1103 class LanguageTagParser { 1104 public: 1105 LanguageTagParser(StringView tag) 1106 : m_range(tag.splitAllowingEmptyEntries('-')) 1107 , m_cursor(m_range.begin()) 1108 { 1109 ASSERT(m_cursor != m_range.end()); 1110 m_current = *m_cursor; 1111 } 1112 1113 bool parseUnicodeLocaleId(); 1114 bool parseUnicodeLanguageId(); 1115 1116 bool isEOS() 1117 { 1118 return m_cursor == m_range.end(); 1119 } 1120 1121 bool next() 1122 { 1123 if (isEOS()) 1124 return false; 1125 1126 ++m_cursor; 1127 if (isEOS()) { 1128 m_current = StringView(); 1129 return false; 1130 } 1131 m_current = *m_cursor; 1132 return true; 1133 } 1134 1135 private: 1136 bool parseExtensionsAndPUExtensions(); 1137 1138 bool parseUnicodeExtensionAfterPrefix(); 1139 bool parseTransformedExtensionAfterPrefix(); 1140 bool parseOtherExtensionAfterPrefix(); 1141 bool parsePUExtensionAfterPrefix(); 1142 1143 StringView::SplitResult m_range; 1144 StringView::SplitResult::Iterator m_cursor; 1145 StringView m_current; 1146 }; 1147 1148 bool LanguageTagParser::parseUnicodeLocaleId() 1149 { 1150 // unicode_locale_id = unicode_language_id 1151 // extensions* 1152 // pu_extensions? ; 1153 ASSERT(!isEOS()); 1154 if (!parseUnicodeLanguageId()) 1155 return false; 1156 if (isEOS()) 1157 return true; 1158 if (!parseExtensionsAndPUExtensions()) 1159 return false; 1160 return true; 1161 } 1162 1163 bool LanguageTagParser::parseUnicodeLanguageId() 1164 { 1165 // unicode_language_id = unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ; 1166 ASSERT(!isEOS()); 1167 if (!isUnicodeLanguageSubtag(m_current)) 1168 return false; 1169 if (!next()) 1170 return true; 1171 1172 if (isUnicodeScriptSubtag(m_current)) { 1173 if (!next()) 1174 return true; 1175 } 1176 1177 if (isUnicodeRegionSubtag(m_current)) { 1178 if (!next()) 1179 return true; 1180 } 1181 1182 HashSet<VariantCode> variantCodes; 1183 while (true) { 1184 if (!isUnicodeVariantSubtag(m_current)) 1185 return true; 1186 // https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag 1187 // does not include duplicate variant subtags 1188 if (!variantCodes.add(parseVariantCode(m_current)).isNewEntry) 1189 return false; 1190 if (!next()) 1191 return true; 1192 } 1193 } 1194 1195 bool LanguageTagParser::parseUnicodeExtensionAfterPrefix() 1196 { 1197 // ((sep keyword)+ | (sep attribute)+ (sep keyword)*) ; 1198 // 1199 // keyword = key (sep type)? ; 1200 // key = alphanum alpha ; 1201 // type = alphanum{3,8} (sep alphanum{3,8})* ; 1202 // attribute = alphanum{3,8} ; 1203 ASSERT(!isEOS()); 1204 bool isAttributeOrKeyword = false; 1205 if (isUnicodeExtensionAttribute(m_current)) { 1206 isAttributeOrKeyword = true; 1207 while (true) { 1208 if (!isUnicodeExtensionAttribute(m_current)) 1209 break; 1210 if (!next()) 1211 return true; 1212 } 1213 } 1214 1215 if (isUnicodeExtensionKey(m_current)) { 1216 isAttributeOrKeyword = true; 1217 while (true) { 1218 if (!isUnicodeExtensionKey(m_current)) 1219 break; 1220 if (!next()) 1221 return true; 1222 while (true) { 1223 if (!isUnicodeExtensionTypeComponent(m_current)) 1224 break; 1225 if (!next()) 1226 return true; 1227 } 1228 } 1229 } 1230 1231 if (!isAttributeOrKeyword) 1232 return false; 1233 return true; 1234 } 1235 1236 bool LanguageTagParser::parseTransformedExtensionAfterPrefix() 1237 { 1238 // ((sep tlang (sep tfield)*) | (sep tfield)+) ; 1239 // 1240 // tlang = unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ; 1241 // tfield = tkey tvalue; 1242 // tkey = alpha digit ; 1243 // tvalue = (sep alphanum{3,8})+ ; 1244 ASSERT(!isEOS()); 1245 bool found = false; 1246 if (isUnicodeLanguageSubtag(m_current)) { 1247 found = true; 1248 if (!parseUnicodeLanguageId()) 1249 return false; 1250 if (isEOS()) 1251 return true; 1252 } 1253 1254 if (isUnicodeTKey(m_current)) { 1255 found = true; 1256 while (true) { 1257 if (!isUnicodeTKey(m_current)) 1258 break; 1259 if (!next()) 1260 return false; 1261 if (!isUnicodeTValueComponent(m_current)) 1262 return false; 1263 if (!next()) 1264 return true; 1265 while (true) { 1266 if (!isUnicodeTValueComponent(m_current)) 1267 break; 1268 if (!next()) 1269 return true; 1270 } 1271 } 1272 } 1273 1274 return found; 1275 } 1276 1277 bool LanguageTagParser::parseOtherExtensionAfterPrefix() 1278 { 1279 // (sep alphanum{2,8})+ ; 1280 ASSERT(!isEOS()); 1281 if (!isUnicodeOtherExtensionValue(m_current)) 1282 return false; 1283 if (!next()) 1284 return true; 1285 1286 while (true) { 1287 if (!isUnicodeOtherExtensionValue(m_current)) 1288 return true; 1289 if (!next()) 1290 return true; 1291 } 1292 } 1293 1294 bool LanguageTagParser::parsePUExtensionAfterPrefix() 1295 { 1296 // (sep alphanum{1,8})+ ; 1297 ASSERT(!isEOS()); 1298 if (!isUnicodePUExtensionValue(m_current)) 1299 return false; 1300 if (!next()) 1301 return true; 1302 1303 while (true) { 1304 if (!isUnicodePUExtensionValue(m_current)) 1305 return true; 1306 if (!next()) 1307 return true; 1308 } 1309 } 1310 1311 bool LanguageTagParser::parseExtensionsAndPUExtensions() 1312 { 1313 // unicode_locale_id = unicode_language_id 1314 // extensions* 1315 // pu_extensions? ; 1316 // 1317 // extensions = unicode_locale_extensions 1318 // | transformed_extensions 1319 // | other_extensions ; 1320 // 1321 // pu_extensions = sep [xX] (sep alphanum{1,8})+ ; 1322 ASSERT(!isEOS()); 1323 Bitmap<numberOfUnicodeSingletons> singletonsSet { }; 1324 while (true) { 1325 if (m_current.length() != 1) 1326 return true; 1327 UChar prefixCode = m_current[0]; 1328 if (!isASCIIAlphanumeric(prefixCode)) 1329 return true; 1330 1331 // https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag 1332 // does not include duplicate singleton subtags. 1333 // 1334 // https://unicode.org/reports/tr35/#Unicode_locale_identifier 1335 // As is often the case, the complete syntactic constraints are not easily captured by ABNF, 1336 // so there is a further condition: There cannot be more than one extension with the same singleton (-a-, …, -t-, -u-, …). 1337 // Note that the private use extension (-x-) must come after all other extensions. 1338 if (singletonsSet.get(convertToUnicodeSingletonIndex(prefixCode))) 1339 return false; 1340 singletonsSet.set(convertToUnicodeSingletonIndex(prefixCode), true); 1341 1342 switch (prefixCode) { 1343 case 'u': 1344 case 'U': { 1345 // unicode_locale_extensions = sep [uU] ((sep keyword)+ | (sep attribute)+ (sep keyword)*) ; 1346 if (!next()) 1347 return false; 1348 if (!parseUnicodeExtensionAfterPrefix()) 1349 return false; 1350 if (isEOS()) 1351 return true; 1352 break; // Next extension. 1353 } 1354 case 't': 1355 case 'T': { 1356 // transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) | (sep tfield)+) ; 1357 if (!next()) 1358 return false; 1359 if (!parseTransformedExtensionAfterPrefix()) 1360 return false; 1361 if (isEOS()) 1362 return true; 1363 break; // Next extension. 1364 } 1365 case 'x': 1366 case 'X': { 1367 // pu_extensions = sep [xX] (sep alphanum{1,8})+ ; 1368 if (!next()) 1369 return false; 1370 if (!parsePUExtensionAfterPrefix()) 1371 return false; 1372 return true; // If pu_extensions appear, no extensions can follow after that. This must be the end of unicode_locale_id. 1373 } 1374 default: { 1375 // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ; 1376 if (!next()) 1377 return false; 1378 if (!parseOtherExtensionAfterPrefix()) 1379 return false; 1380 if (isEOS()) 1381 return true; 1382 break; // Next extension. 1383 } 1384 } 1385 } 1386 } 1387 1388 // https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag 1389 bool isStructurallyValidLanguageTag(StringView string) 1390 { 1391 LanguageTagParser parser(string); 1392 if (!parser.parseUnicodeLocaleId()) 1393 return false; 1394 if (!parser.isEOS()) 1395 return false; 1396 return true; 1397 } 1398 1399 // unicode_language_id, but intersection of BCP47 and UTS35. 1400 // unicode_language_id = 1401 // | unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ; 1402 // https://github.com/tc39/proposal-intl-displaynames/issues/79 1403 bool isUnicodeLanguageId(StringView string) 1404 { 1405 LanguageTagParser parser(string); 1406 if (!parser.parseUnicodeLanguageId()) 1407 return false; 1408 if (!parser.isEOS()) 1409 return false; 1410 return true; 1411 } 1412 1413 bool isWellFormedCurrencyCode(StringView currency) 1414 { 1415 return currency.length() == 3 && currency.isAllSpecialCharacters<isASCIIAlpha>(); 1416 } 1417 1418 JSC_DEFINE_HOST_FUNCTION(intlObjectFuncGetCanonicalLocales, (JSGlobalObject* globalObject, CallFrame* callFrame)) 1419 { 1420 // Intl.getCanonicalLocales(locales) 1421 // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales 1422 1423 VM& vm = globalObject->vm(); 1424 auto scope = DECLARE_THROW_SCOPE(vm); 1425 1426 Vector<String> localeList = canonicalizeLocaleList(globalObject, callFrame->argument(0)); 1427 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 1428 auto length = localeList.size(); 1429 1430 JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous), length); 1431 if (!localeArray) { 1432 throwOutOfMemoryError(globalObject, scope); 1433 return encodedJSValue(); 1434 } 1435 1436 for (size_t i = 0; i < length; ++i) { 1437 localeArray->putDirectIndex(globalObject, i, jsString(vm, localeList[i])); 1438 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 1439 } 1440 return JSValue::encode(localeArray); 1441 } 1442 1443 } // namespace JSC