/ runtime / IntlObject.cpp
IntlObject.cpp
   1  /*
   2   * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family)
   3   * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
   4   * Copyright (C) 2016-2020 Apple Inc. All rights reserved.
   5   * Copyright (C) 2020 Sony Interactive Entertainment Inc.
   6   *
   7   * Redistribution and use in source and binary forms, with or without
   8   * modification, are permitted provided that the following conditions
   9   * are met:
  10   * 1. Redistributions of source code must retain the above copyright
  11   *    notice, this list of conditions and the following disclaimer.
  12   * 2. Redistributions in binary form must reproduce the above copyright
  13   *    notice, this list of conditions and the following disclaimer in the
  14   *    documentation and/or other materials provided with the distribution.
  15   *
  16   * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
  17   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  18   * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  19   * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
  20   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  26   * THE POSSIBILITY OF SUCH DAMAGE.
  27   */
  28  
  29  #include "config.h"
  30  #include "IntlObject.h"
  31  
  32  #include "Error.h"
  33  #include "FunctionPrototype.h"
  34  #include "IntlCollator.h"
  35  #include "IntlCollatorConstructor.h"
  36  #include "IntlCollatorPrototype.h"
  37  #include "IntlDateTimeFormatConstructor.h"
  38  #include "IntlDateTimeFormatPrototype.h"
  39  #include "IntlDisplayNames.h"
  40  #include "IntlDisplayNamesConstructor.h"
  41  #include "IntlDisplayNamesPrototype.h"
  42  #include "IntlListFormat.h"
  43  #include "IntlListFormatConstructor.h"
  44  #include "IntlListFormatPrototype.h"
  45  #include "IntlLocale.h"
  46  #include "IntlLocaleConstructor.h"
  47  #include "IntlLocalePrototype.h"
  48  #include "IntlNumberFormatConstructor.h"
  49  #include "IntlNumberFormatPrototype.h"
  50  #include "IntlObjectInlines.h"
  51  #include "IntlPluralRulesConstructor.h"
  52  #include "IntlPluralRulesPrototype.h"
  53  #include "IntlRelativeTimeFormatConstructor.h"
  54  #include "IntlRelativeTimeFormatPrototype.h"
  55  #include "IntlSegmenter.h"
  56  #include "IntlSegmenterConstructor.h"
  57  #include "IntlSegmenterPrototype.h"
  58  #include "JSCInlines.h"
  59  #include "Options.h"
  60  #include <unicode/ubrk.h>
  61  #include <unicode/ucol.h>
  62  #include <unicode/ufieldpositer.h>
  63  #include <unicode/uloc.h>
  64  #include <unicode/unumsys.h>
  65  #include <wtf/Assertions.h>
  66  #include <wtf/Language.h>
  67  #include <wtf/NeverDestroyed.h>
  68  #include <wtf/text/StringBuilder.h>
  69  #include <wtf/text/StringImpl.h>
  70  #include <wtf/unicode/icu/ICUHelpers.h>
  71  
  72  namespace JSC {
  73  
  74  STATIC_ASSERT_IS_TRIVIALLY_DESTRUCTIBLE(IntlObject);
  75  
  76  static JSC_DECLARE_HOST_FUNCTION(intlObjectFuncGetCanonicalLocales);
  77  
  78  static JSValue createCollatorConstructor(VM& vm, JSObject* object)
  79  {
  80      IntlObject* intlObject = jsCast<IntlObject*>(object);
  81      JSGlobalObject* globalObject = intlObject->globalObject(vm);
  82      return IntlCollatorConstructor::create(vm, IntlCollatorConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlCollatorPrototype*>(globalObject->collatorStructure()->storedPrototypeObject()));
  83  }
  84  
  85  static JSValue createDateTimeFormatConstructor(VM& vm, JSObject* object)
  86  {
  87      IntlObject* intlObject = jsCast<IntlObject*>(object);
  88      JSGlobalObject* globalObject = intlObject->globalObject(vm);
  89      return globalObject->dateTimeFormatConstructor();
  90  }
  91  
  92  static JSValue createDisplayNamesConstructor(VM& vm, JSObject* object)
  93  {
  94      IntlObject* intlObject = jsCast<IntlObject*>(object);
  95      JSGlobalObject* globalObject = intlObject->globalObject(vm);
  96      return IntlDisplayNamesConstructor::create(vm, IntlDisplayNamesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlDisplayNamesPrototype*>(globalObject->displayNamesStructure()->storedPrototypeObject()));
  97  }
  98  
  99  static JSValue createListFormatConstructor(VM& vm, JSObject* object)
 100  {
 101      IntlObject* intlObject = jsCast<IntlObject*>(object);
 102      JSGlobalObject* globalObject = intlObject->globalObject(vm);
 103      return IntlListFormatConstructor::create(vm, IntlListFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlListFormatPrototype*>(globalObject->listFormatStructure()->storedPrototypeObject()));
 104  }
 105  
 106  static JSValue createLocaleConstructor(VM& vm, JSObject* object)
 107  {
 108      IntlObject* intlObject = jsCast<IntlObject*>(object);
 109      JSGlobalObject* globalObject = intlObject->globalObject(vm);
 110      return IntlLocaleConstructor::create(vm, IntlLocaleConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlLocalePrototype*>(globalObject->localeStructure()->storedPrototypeObject()));
 111  }
 112  
 113  static JSValue createNumberFormatConstructor(VM& vm, JSObject* object)
 114  {
 115      IntlObject* intlObject = jsCast<IntlObject*>(object);
 116      JSGlobalObject* globalObject = intlObject->globalObject(vm);
 117      return globalObject->numberFormatConstructor();
 118  }
 119  
 120  static JSValue createPluralRulesConstructor(VM& vm, JSObject* object)
 121  {
 122      IntlObject* intlObject = jsCast<IntlObject*>(object);
 123      JSGlobalObject* globalObject = intlObject->globalObject(vm);
 124      return IntlPluralRulesConstructor::create(vm, IntlPluralRulesConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlPluralRulesPrototype*>(globalObject->pluralRulesStructure()->storedPrototypeObject()));
 125  }
 126  
 127  static JSValue createRelativeTimeFormatConstructor(VM& vm, JSObject* object)
 128  {
 129      IntlObject* intlObject = jsCast<IntlObject*>(object);
 130      JSGlobalObject* globalObject = intlObject->globalObject(vm);
 131      return IntlRelativeTimeFormatConstructor::create(vm, IntlRelativeTimeFormatConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlRelativeTimeFormatPrototype*>(globalObject->relativeTimeFormatStructure()->storedPrototypeObject()));
 132  }
 133  
 134  static JSValue createSegmenterConstructor(VM& vm, JSObject* object)
 135  {
 136      IntlObject* intlObject = jsCast<IntlObject*>(object);
 137      JSGlobalObject* globalObject = intlObject->globalObject(vm);
 138      return IntlSegmenterConstructor::create(vm, IntlSegmenterConstructor::createStructure(vm, globalObject, globalObject->functionPrototype()), jsCast<IntlSegmenterPrototype*>(globalObject->segmenterStructure()->storedPrototypeObject()));
 139  }
 140  
 141  }
 142  
 143  #include "IntlObject.lut.h"
 144  
 145  namespace JSC {
 146  
 147  /* Source for IntlObject.lut.h
 148  @begin intlObjectTable
 149    getCanonicalLocales   intlObjectFuncGetCanonicalLocales            DontEnum|Function 1
 150    Collator              createCollatorConstructor                    DontEnum|PropertyCallback
 151    DateTimeFormat        createDateTimeFormatConstructor              DontEnum|PropertyCallback
 152    Locale                createLocaleConstructor                      DontEnum|PropertyCallback
 153    NumberFormat          createNumberFormatConstructor                DontEnum|PropertyCallback
 154    PluralRules           createPluralRulesConstructor                 DontEnum|PropertyCallback
 155    RelativeTimeFormat    createRelativeTimeFormatConstructor          DontEnum|PropertyCallback
 156    Segmenter             createSegmenterConstructor                   DontEnum|PropertyCallback
 157  @end
 158  */
 159  
 160  struct MatcherResult {
 161      String locale;
 162      String extension;
 163      size_t extensionIndex { 0 };
 164  };
 165  
 166  const ClassInfo IntlObject::s_info = { "Intl", &Base::s_info, &intlObjectTable, nullptr, CREATE_METHOD_TABLE(IntlObject) };
 167  
 168  void UFieldPositionIteratorDeleter::operator()(UFieldPositionIterator* iterator) const
 169  {
 170      if (iterator)
 171          ufieldpositer_close(iterator);
 172  }
 173  
 174  IntlObject::IntlObject(VM& vm, Structure* structure)
 175      : Base(vm, structure)
 176  {
 177  }
 178  
 179  IntlObject* IntlObject::create(VM& vm, JSGlobalObject* globalObject, Structure* structure)
 180  {
 181      IntlObject* object = new (NotNull, allocateCell<IntlObject>(vm.heap)) IntlObject(vm, structure);
 182      object->finishCreation(vm, globalObject);
 183      return object;
 184  }
 185  
 186  void IntlObject::finishCreation(VM& vm, JSGlobalObject*)
 187  {
 188      Base::finishCreation(vm);
 189      ASSERT(inherits(vm, info()));
 190      JSC_TO_STRING_TAG_WITHOUT_TRANSITION();
 191  #if HAVE(ICU_U_LOCALE_DISPLAY_NAMES)
 192      putDirectWithoutTransition(vm, vm.propertyNames->DisplayNames, createDisplayNamesConstructor(vm, this), static_cast<unsigned>(PropertyAttribute::DontEnum));
 193  #else
 194      UNUSED_PARAM(&createDisplayNamesConstructor);
 195  #endif
 196  #if HAVE(ICU_U_LIST_FORMATTER)
 197      putDirectWithoutTransition(vm, vm.propertyNames->ListFormat, createListFormatConstructor(vm, this), static_cast<unsigned>(PropertyAttribute::DontEnum));
 198  #else
 199      UNUSED_PARAM(&createListFormatConstructor);
 200  #endif
 201  }
 202  
 203  Structure* IntlObject::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
 204  {
 205      return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
 206  }
 207  
 208  static Vector<StringView> unicodeExtensionComponents(StringView extension)
 209  {
 210      // UnicodeExtensionSubtags (extension)
 211      // https://tc39.github.io/ecma402/#sec-unicodeextensionsubtags
 212  
 213      auto extensionLength = extension.length();
 214      if (extensionLength < 3)
 215          return { };
 216  
 217      Vector<StringView> subtags;
 218      size_t subtagStart = 3; // Skip initial -u-.
 219      size_t valueStart = 3;
 220      bool isLeading = true;
 221      for (size_t index = subtagStart; index < extensionLength; ++index) {
 222          if (extension[index] == '-') {
 223              if (index - subtagStart == 2) {
 224                  // Tag is a key, first append prior key's value if there is one.
 225                  if (subtagStart - valueStart > 1)
 226                      subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
 227                  subtags.append(extension.substring(subtagStart, index - subtagStart));
 228                  valueStart = index + 1;
 229                  isLeading = false;
 230              } else if (isLeading) {
 231                  // Leading subtags before first key.
 232                  subtags.append(extension.substring(subtagStart, index - subtagStart));
 233                  valueStart = index + 1;
 234              }
 235              subtagStart = index + 1;
 236          }
 237      }
 238      if (extensionLength - subtagStart == 2) {
 239          // Trailing an extension key, first append prior key's value if there is one.
 240          if (subtagStart - valueStart > 1)
 241              subtags.append(extension.substring(valueStart, subtagStart - valueStart - 1));
 242          valueStart = subtagStart;
 243      }
 244      // Append final key's value.
 245      subtags.append(extension.substring(valueStart, extensionLength - valueStart));
 246      return subtags;
 247  }
 248  
 249  Vector<char, 32> localeIDBufferForLanguageTag(const CString& tag)
 250  {
 251      if (!tag.length())
 252          return { };
 253  
 254      UErrorCode status = U_ZERO_ERROR;
 255      Vector<char, 32> buffer(32);
 256      int32_t parsedLength;
 257      auto bufferLength = uloc_forLanguageTag(tag.data(), buffer.data(), buffer.size(), &parsedLength, &status);
 258      if (needsToGrowToProduceCString(status)) {
 259          // Before ICU 64, there's a chance uloc_forLanguageTag will "buffer overflow" while requesting a *smaller* size.
 260          buffer.resize(bufferLength + 1);
 261          status = U_ZERO_ERROR;
 262          uloc_forLanguageTag(tag.data(), buffer.data(), bufferLength + 1, &parsedLength, &status);
 263      }
 264      if (U_FAILURE(status) || parsedLength != static_cast<int32_t>(tag.length()))
 265          return { };
 266  
 267      ASSERT(buffer.contains('\0'));
 268      return buffer;
 269  }
 270  
 271  Vector<char, 32> canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(Vector<char, 32>&& buffer)
 272  {
 273      StringView locale(buffer.data(), buffer.size());
 274      ASSERT(locale.is8Bit());
 275      size_t extensionIndex = locale.find("-u-");
 276      if (extensionIndex == notFound)
 277          return WTFMove(buffer);
 278  
 279      // Since ICU's canonicalization is incomplete, we need to perform some of canonicalization here.
 280      size_t extensionLength = locale.length() - extensionIndex;
 281      size_t end = extensionIndex + 3;
 282      while (end < locale.length()) {
 283          end = locale.find('-', end);
 284          if (end == notFound)
 285              break;
 286          // Found another singleton.
 287          if (end + 2 < locale.length() && locale[end + 2] == '-') {
 288              extensionLength = end - extensionIndex;
 289              break;
 290          }
 291          end++;
 292      }
 293  
 294      Vector<char, 32> result;
 295      result.append(buffer.data(), extensionIndex + 2); // "-u" is included.
 296      StringView extension = locale.substring(extensionIndex, extensionLength);
 297      ASSERT(extension.is8Bit());
 298      auto subtags = unicodeExtensionComponents(extension);
 299      for (unsigned index = 0; index < subtags.size();) {
 300          auto subtag = subtags[index];
 301          ASSERT(subtag.is8Bit());
 302          result.append('-');
 303          result.append(subtag.characters8(), subtag.length());
 304  
 305          if (subtag.length() != 2) {
 306              ++index;
 307              continue;
 308          }
 309          ASSERT(subtag.length() == 2);
 310  
 311          // This is unicode extension key.
 312          unsigned valueIndexStart = index + 1;
 313          unsigned valueIndexEnd = valueIndexStart;
 314          for (; valueIndexEnd < subtags.size(); ++valueIndexEnd) {
 315              if (subtags[valueIndexEnd].length() == 2)
 316                  break;
 317          }
 318          // [valueIndexStart, valueIndexEnd) is value of this unicode extension. If there is no value, valueIndexStart == valueIndexEnd.
 319  
 320          for (unsigned valueIndex = valueIndexStart; valueIndex < valueIndexEnd; ++valueIndex) {
 321              auto value = subtags[valueIndex];
 322              if (value != "true"_s) {
 323                  result.append('-');
 324                  result.append(value.characters8(), value.length());
 325              }
 326          }
 327          index = valueIndexEnd;
 328      }
 329  
 330      unsigned remainingStart = extensionIndex + extensionLength;
 331      unsigned remainingLength = buffer.size() - remainingStart;
 332      result.append(buffer.data() + remainingStart, remainingLength);
 333      return result;
 334  }
 335  
 336  String languageTagForLocaleID(const char* localeID, bool isImmortal)
 337  {
 338      Vector<char, 32> buffer;
 339      auto status = callBufferProducingFunction(uloc_toLanguageTag, localeID, buffer, false);
 340      if (U_FAILURE(status))
 341          return String();
 342  
 343      auto createResult = [&](Vector<char, 32>&& buffer) -> String {
 344          // This is used to store into static variables that may be shared across JSC execution threads.
 345          // This must be immortal to make concurrent ref/deref safe.
 346          if (isImmortal)
 347              return StringImpl::createStaticStringImpl(buffer.data(), buffer.size());
 348          return String(buffer.data(), buffer.size());
 349      };
 350  
 351      return createResult(canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization(WTFMove(buffer)));
 352  }
 353  
 354  // Ensure we have xx-ZZ whenever we have xx-Yyyy-ZZ.
 355  static void addScriptlessLocaleIfNeeded(HashSet<String>& availableLocales, StringView locale)
 356  {
 357      if (locale.length() < 10)
 358          return;
 359  
 360      Vector<StringView, 3> subtags;
 361      for (auto subtag : locale.split('-')) {
 362          if (subtags.size() == 3)
 363              return;
 364          subtags.append(subtag);
 365      }
 366  
 367      if (subtags.size() != 3 || subtags[1].length() != 4 || subtags[2].length() > 3)
 368          return;
 369  
 370      Vector<char, 12> buffer;
 371      ASSERT(subtags[0].is8Bit() && subtags[0].isAllASCII());
 372      buffer.append(reinterpret_cast<const char*>(subtags[0].characters8()), subtags[0].length());
 373      buffer.append('-');
 374      ASSERT(subtags[2].is8Bit() && subtags[2].isAllASCII());
 375      buffer.append(reinterpret_cast<const char*>(subtags[2].characters8()), subtags[2].length());
 376  
 377      availableLocales.add(StringImpl::createStaticStringImpl(buffer.data(), buffer.size()));
 378  }
 379  
 380  const HashSet<String>& intlAvailableLocales()
 381  {
 382      static LazyNeverDestroyed<HashSet<String>> availableLocales;
 383      static std::once_flag initializeOnce;
 384      std::call_once(initializeOnce, [&] {
 385          availableLocales.construct();
 386          ASSERT(availableLocales->isEmpty());
 387          constexpr bool isImmortal = true;
 388          int32_t count = uloc_countAvailable();
 389          for (int32_t i = 0; i < count; ++i) {
 390              String locale = languageTagForLocaleID(uloc_getAvailable(i), isImmortal);
 391              if (locale.isEmpty())
 392                  continue;
 393              availableLocales->add(locale);
 394              addScriptlessLocaleIfNeeded(availableLocales.get(), locale);
 395          }
 396      });
 397      return availableLocales;
 398  }
 399  
 400  // This table is total ordering indexes for ASCII characters in UCA DUCET.
 401  // It is generated from CLDR common/uca/allkeys_DUCET.txt.
 402  //
 403  // Rough overview of UCA is the followings.
 404  // https://unicode.org/reports/tr10/#Main_Algorithm
 405  //
 406  //     1. Normalize each input string.
 407  //
 408  //     2. Produce an array of collation elements for each string.
 409  //
 410  //         There are 3 (or 4) levels. And each character has 4 weights. We concatenate them into one sequence called collation elements.
 411  //         For example, "c" has `[.0706.0020.0002]`. And "ca◌́b" becomes `[.0706.0020.0002], [.06D9.0020.0002], [.0000.0021.0002], [.06EE.0020.0002]`
 412  //         We need to consider variable weighting (https://unicode.org/reports/tr10/#Variable_Weighting), but if it is Non-ignorable, we can just use
 413  //         the collation elements defined in the table.
 414  //
 415  //     3. Produce a sort key for each string from the arrays of collation elements.
 416  //
 417  //         Generate sort key from collation elements. From lower levels to higher levels, we collect weights. But 0000 weight is skipped.
 418  //         Between levels, we insert 0000 weight if the boundary.
 419  //
 420  //             string: "ca◌́b"
 421  //             collation elements: `[.0706.0020.0002], [.06D9.0020.0002], [.0000.0021.0002], [.06EE.0020.0002]`
 422  //             sort key: `0706 06D9 06EE 0000 0020 0020 0021 0020 0000 0002 0002 0002 0002`
 423  //                                        ^                        ^
 424  //                                        level boundary           level boundary
 425  //
 426  //     4. Compare the two sort keys with a binary comparison operation.
 427  //
 428  // Key observations are the followings.
 429  //
 430  //     1. If an input is an ASCII string, UCA step-1 normalization does nothing.
 431  //     2. If an input is an ASCII string, non-starters (https://unicode.org/reports/tr10/#UTS10-D33) does not exist. So no special handling in UCA step-2 is required.
 432  //     3. If an input is an ASCII string, no multiple character collation elements exist. So no special handling in UCA step-2 is required. For example, "L·" is not ASCII.
 433  //     4. UCA step-3 handles 0000 weighted characters specially. And ASCII contains these characters. But 0000 elements are used only for rare control characters.
 434  //        We can ignore this special handling if ASCII strings do not include control characters.
 435  //     5. Except 0000 cases, all characters' level-1 weights are different. And level-2 weights are always 0020, which is lower than any level-1 weights.
 436  //        This means that binary comparison in UCA step-4 do not need to check level 2~ weights.
 437  //
 438  //  Based on the above observation, our fast path handles ASCII strings excluding control characters. The following weight is recomputed weights from level-1 weights.
 439  const uint8_t ducetWeights[128] = {
 440      0, 0, 0, 0, 0, 0, 0, 0,
 441      0, 1, 2, 3, 4, 5, 0, 0,
 442      0, 0, 0, 0, 0, 0, 0, 0,
 443      0, 0, 0, 0, 0, 0, 0, 0,
 444      6, 12, 16, 28, 38, 29, 27, 15,
 445      17, 18, 24, 32, 9, 8, 14, 25,
 446      39, 40, 41, 42, 43, 44, 45, 46,
 447      47, 48, 11, 10, 33, 34, 35, 13,
 448      23, 50, 52, 54, 56, 58, 60, 62,
 449      64, 66, 68, 70, 72, 74, 76, 78,
 450      80, 82, 84, 86, 88, 90, 92, 94,
 451      96, 98, 100, 19, 26, 20, 31, 7,
 452      30, 49, 51, 53, 55, 57, 59, 61,
 453      63, 65, 67, 69, 71, 73, 75, 77,
 454      79, 81, 83, 85, 87, 89, 91, 93,
 455      95, 97, 99, 21, 36, 22, 37, 0,
 456  };
 457  
 458  const HashSet<String>& intlCollatorAvailableLocales()
 459  {
 460      static LazyNeverDestroyed<HashSet<String>> availableLocales;
 461      static std::once_flag initializeOnce;
 462      std::call_once(initializeOnce, [&] {
 463          availableLocales.construct();
 464          ASSERT(availableLocales->isEmpty());
 465          constexpr bool isImmortal = true;
 466          int32_t count = ucol_countAvailable();
 467          for (int32_t i = 0; i < count; ++i) {
 468              String locale = languageTagForLocaleID(ucol_getAvailable(i), isImmortal);
 469              if (locale.isEmpty())
 470                  continue;
 471              availableLocales->add(locale);
 472              addScriptlessLocaleIfNeeded(availableLocales.get(), locale);
 473          }
 474          IntlCollator::checkICULocaleInvariants(availableLocales.get());
 475      });
 476      return availableLocales;
 477  }
 478  
 479  const HashSet<String>& intlSegmenterAvailableLocales()
 480  {
 481      static NeverDestroyed<HashSet<String>> cachedAvailableLocales;
 482      HashSet<String>& availableLocales = cachedAvailableLocales.get();
 483  
 484      static std::once_flag initializeOnce;
 485      std::call_once(initializeOnce, [&] {
 486          ASSERT(availableLocales.isEmpty());
 487          constexpr bool isImmortal = true;
 488          int32_t count = ubrk_countAvailable();
 489          for (int32_t i = 0; i < count; ++i) {
 490              String locale = languageTagForLocaleID(ubrk_getAvailable(i), isImmortal);
 491              if (locale.isEmpty())
 492                  continue;
 493              availableLocales.add(locale);
 494              addScriptlessLocaleIfNeeded(availableLocales, locale);
 495          }
 496      });
 497      return availableLocales;
 498  }
 499  
 500  // https://tc39.es/ecma402/#sec-getoption
 501  TriState intlBooleanOption(JSGlobalObject* globalObject, JSValue options, PropertyName property)
 502  {
 503      VM& vm = globalObject->vm();
 504      auto scope = DECLARE_THROW_SCOPE(vm);
 505  
 506      if (options.isUndefined())
 507          return TriState::Indeterminate;
 508  
 509      JSObject* opts = options.toObject(globalObject);
 510      RETURN_IF_EXCEPTION(scope, TriState::Indeterminate);
 511  
 512      JSValue value = opts->get(globalObject, property);
 513      RETURN_IF_EXCEPTION(scope, TriState::Indeterminate);
 514  
 515      if (value.isUndefined())
 516          return TriState::Indeterminate;
 517  
 518      return triState(value.toBoolean(globalObject));
 519  }
 520  
 521  String intlStringOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, std::initializer_list<const char*> values, const char* notFound, const char* fallback)
 522  {
 523      // GetOption (options, property, type="string", values, fallback)
 524      // https://tc39.github.io/ecma402/#sec-getoption
 525  
 526      VM& vm = globalObject->vm();
 527      auto scope = DECLARE_THROW_SCOPE(vm);
 528  
 529      if (options.isUndefined())
 530          return fallback;
 531  
 532      JSObject* opts = options.toObject(globalObject);
 533      RETURN_IF_EXCEPTION(scope, String());
 534  
 535      JSValue value = opts->get(globalObject, property);
 536      RETURN_IF_EXCEPTION(scope, String());
 537  
 538      if (!value.isUndefined()) {
 539          String stringValue = value.toWTFString(globalObject);
 540          RETURN_IF_EXCEPTION(scope, String());
 541  
 542          if (values.size() && std::find(values.begin(), values.end(), stringValue) == values.end()) {
 543              throwException(globalObject, scope, createRangeError(globalObject, notFound));
 544              return { };
 545          }
 546          return stringValue;
 547      }
 548  
 549      return fallback;
 550  }
 551  
 552  unsigned intlNumberOption(JSGlobalObject* globalObject, JSValue options, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
 553  {
 554      // GetNumberOption (options, property, minimum, maximum, fallback)
 555      // https://tc39.github.io/ecma402/#sec-getnumberoption
 556  
 557      VM& vm = globalObject->vm();
 558      auto scope = DECLARE_THROW_SCOPE(vm);
 559  
 560      if (options.isUndefined())
 561          return fallback;
 562  
 563      JSObject* opts = options.toObject(globalObject);
 564      RETURN_IF_EXCEPTION(scope, 0);
 565  
 566      JSValue value = opts->get(globalObject, property);
 567      RETURN_IF_EXCEPTION(scope, 0);
 568  
 569      RELEASE_AND_RETURN(scope, intlDefaultNumberOption(globalObject, value, property, minimum, maximum, fallback));
 570  }
 571  
 572  unsigned intlDefaultNumberOption(JSGlobalObject* globalObject, JSValue value, PropertyName property, unsigned minimum, unsigned maximum, unsigned fallback)
 573  {
 574      // DefaultNumberOption (value, minimum, maximum, fallback)
 575      // https://tc39.github.io/ecma402/#sec-defaultnumberoption
 576  
 577      VM& vm = globalObject->vm();
 578      auto scope = DECLARE_THROW_SCOPE(vm);
 579  
 580      if (!value.isUndefined()) {
 581          double doubleValue = value.toNumber(globalObject);
 582          RETURN_IF_EXCEPTION(scope, 0);
 583  
 584          if (!(doubleValue >= minimum && doubleValue <= maximum)) {
 585              throwException(globalObject, scope, createRangeError(globalObject, *property.publicName() + " is out of range"));
 586              return 0;
 587          }
 588          return static_cast<unsigned>(doubleValue);
 589      }
 590      return fallback;
 591  }
 592  
 593  // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier
 594  bool isUnicodeLocaleIdentifierType(StringView string)
 595  {
 596      ASSERT(!string.isNull());
 597  
 598      for (auto part : string.splitAllowingEmptyEntries('-')) {
 599          auto length = part.length();
 600          if (length < 3 || length > 8)
 601              return false;
 602  
 603          for (auto character : part.codeUnits()) {
 604              if (!isASCIIAlphanumeric(character))
 605                  return false;
 606          }
 607      }
 608  
 609      return true;
 610  }
 611  
 612  // https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid
 613  static String canonicalizeLanguageTag(const CString& tag)
 614  {
 615      auto buffer = localeIDBufferForLanguageTag(tag);
 616      if (buffer.isEmpty())
 617          return String();
 618  
 619      return languageTagForLocaleID(buffer.data());
 620  }
 621  
 622  Vector<String> canonicalizeLocaleList(JSGlobalObject* globalObject, JSValue locales)
 623  {
 624      // CanonicalizeLocaleList (locales)
 625      // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
 626  
 627      VM& vm = globalObject->vm();
 628      auto scope = DECLARE_THROW_SCOPE(vm);
 629  
 630      Vector<String> seen;
 631  
 632      if (locales.isUndefined())
 633          return seen;
 634  
 635      JSObject* localesObject;
 636      if (locales.isString() || locales.inherits<IntlLocale>(vm)) {
 637          JSArray* localesArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous));
 638          if (!localesArray) {
 639              throwOutOfMemoryError(globalObject, scope);
 640              return { };
 641          }
 642          localesArray->push(globalObject, locales);
 643          RETURN_IF_EXCEPTION(scope, Vector<String>());
 644  
 645          localesObject = localesArray;
 646      } else {
 647          localesObject = locales.toObject(globalObject);
 648          RETURN_IF_EXCEPTION(scope, Vector<String>());
 649      }
 650  
 651      // 6. Let len be ToLength(Get(O, "length")).
 652      JSValue lengthProperty = localesObject->get(globalObject, vm.propertyNames->length);
 653      RETURN_IF_EXCEPTION(scope, Vector<String>());
 654  
 655      uint64_t length = static_cast<uint64_t>(lengthProperty.toLength(globalObject));
 656      RETURN_IF_EXCEPTION(scope, Vector<String>());
 657  
 658      HashSet<String> seenSet;
 659      for (uint64_t k = 0; k < length; ++k) {
 660          bool kPresent = localesObject->hasProperty(globalObject, k);
 661          RETURN_IF_EXCEPTION(scope, Vector<String>());
 662  
 663          if (kPresent) {
 664              JSValue kValue = localesObject->get(globalObject, k);
 665              RETURN_IF_EXCEPTION(scope, Vector<String>());
 666  
 667              if (!kValue.isString() && !kValue.isObject()) {
 668                  throwTypeError(globalObject, scope, "locale value must be a string or object"_s);
 669                  return { };
 670              }
 671  
 672              String tag;
 673              if (kValue.inherits<IntlLocale>(vm))
 674                  tag = jsCast<IntlLocale*>(kValue)->toString();
 675              else {
 676                  JSString* string = kValue.toString(globalObject);
 677                  RETURN_IF_EXCEPTION(scope, Vector<String>());
 678  
 679                  tag = string->value(globalObject);
 680                  RETURN_IF_EXCEPTION(scope, Vector<String>());
 681              }
 682  
 683              if (isStructurallyValidLanguageTag(tag)) {
 684                  ASSERT(tag.isAllASCII());
 685                  String canonicalizedTag = canonicalizeLanguageTag(tag.ascii());
 686                  if (!canonicalizedTag.isNull()) {
 687                      if (seenSet.add(canonicalizedTag).isNewEntry)
 688                          seen.append(canonicalizedTag);
 689                      continue;
 690                  }
 691              }
 692  
 693              String errorMessage = tryMakeString("invalid language tag: ", tag);
 694              if (UNLIKELY(!errorMessage)) {
 695                  throwException(globalObject, scope, createOutOfMemoryError(globalObject));
 696                  return { };
 697              }
 698              throwException(globalObject, scope, createRangeError(globalObject, errorMessage));
 699              return { };
 700          }
 701      }
 702  
 703      return seen;
 704  }
 705  
 706  String bestAvailableLocale(const HashSet<String>& availableLocales, const String& locale)
 707  {
 708      return bestAvailableLocale(locale, [&](const String& candidate) {
 709          return availableLocales.contains(candidate);
 710      });
 711  }
 712  
 713  String defaultLocale(JSGlobalObject* globalObject)
 714  {
 715      // DefaultLocale ()
 716      // https://tc39.github.io/ecma402/#sec-defaultlocale
 717  
 718      // WebCore's global objects will have their own ideas of how to determine the language. It may
 719      // be determined by WebCore-specific logic like some WK settings. Usually this will return the
 720      // same thing as userPreferredLanguages()[0].
 721      if (auto defaultLanguage = globalObject->globalObjectMethodTable()->defaultLanguage) {
 722          String locale = canonicalizeLanguageTag(defaultLanguage().utf8());
 723          if (!locale.isEmpty())
 724              return locale;
 725      }
 726  
 727      Vector<String> languages = userPreferredLanguages();
 728      for (const auto& language : languages) {
 729          String locale = canonicalizeLanguageTag(language.utf8());
 730          if (!locale.isEmpty())
 731              return locale;
 732      }
 733  
 734      // If all else fails, ask ICU. It will probably say something bogus like en_us even if the user
 735      // has configured some other language, but being wrong is better than crashing.
 736      static LazyNeverDestroyed<String> icuDefaultLocalString;
 737      static std::once_flag initializeOnce;
 738      std::call_once(initializeOnce, [&] {
 739          constexpr bool isImmortal = true;
 740          icuDefaultLocalString.construct(languageTagForLocaleID(uloc_getDefault(), isImmortal));
 741      });
 742      if (!icuDefaultLocalString->isEmpty())
 743          return icuDefaultLocalString.get();
 744  
 745      return "en"_s;
 746  }
 747  
 748  String removeUnicodeLocaleExtension(const String& locale)
 749  {
 750      Vector<String> parts = locale.split('-');
 751      StringBuilder builder;
 752      size_t partsSize = parts.size();
 753      bool atPrivate = false;
 754      if (partsSize > 0)
 755          builder.append(parts[0]);
 756      for (size_t p = 1; p < partsSize; ++p) {
 757          if (parts[p] == "x")
 758              atPrivate = true;
 759          if (!atPrivate && parts[p] == "u" && p + 1 < partsSize) {
 760              // Skip the u- and anything that follows until another singleton.
 761              // While the next part is part of the unicode extension, skip it.
 762              while (p + 1 < partsSize && parts[p + 1].length() > 1)
 763                  ++p;
 764          } else {
 765              builder.append('-', parts[p]);
 766          }
 767      }
 768      return builder.toString();
 769  }
 770  
 771  static MatcherResult lookupMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
 772  {
 773      // LookupMatcher (availableLocales, requestedLocales)
 774      // https://tc39.github.io/ecma402/#sec-lookupmatcher
 775  
 776      String locale;
 777      String noExtensionsLocale;
 778      String availableLocale;
 779      for (size_t i = 0; i < requestedLocales.size() && availableLocale.isNull(); ++i) {
 780          locale = requestedLocales[i];
 781          noExtensionsLocale = removeUnicodeLocaleExtension(locale);
 782          availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
 783      }
 784  
 785      MatcherResult result;
 786      if (!availableLocale.isEmpty()) {
 787          result.locale = availableLocale;
 788          if (locale != noExtensionsLocale) {
 789              size_t extensionIndex = locale.find("-u-");
 790              RELEASE_ASSERT(extensionIndex != notFound);
 791  
 792              size_t extensionLength = locale.length() - extensionIndex;
 793              size_t end = extensionIndex + 3;
 794              while (end < locale.length()) {
 795                  end = locale.find('-', end);
 796                  if (end == notFound)
 797                      break;
 798                  if (end + 2 < locale.length() && locale[end + 2] == '-') {
 799                      extensionLength = end - extensionIndex;
 800                      break;
 801                  }
 802                  end++;
 803              }
 804              result.extension = locale.substring(extensionIndex, extensionLength);
 805              result.extensionIndex = extensionIndex;
 806          }
 807      } else
 808          result.locale = defaultLocale(globalObject);
 809      return result;
 810  }
 811  
 812  static MatcherResult bestFitMatcher(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
 813  {
 814      // BestFitMatcher (availableLocales, requestedLocales)
 815      // https://tc39.github.io/ecma402/#sec-bestfitmatcher
 816  
 817      // FIXME: Implement something better than lookup.
 818      return lookupMatcher(globalObject, availableLocales, requestedLocales);
 819  }
 820  
 821  constexpr ASCIILiteral relevantExtensionKeyString(RelevantExtensionKey key)
 822  {
 823      switch (key) {
 824  #define JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS(lowerName, capitalizedName) \
 825      case RelevantExtensionKey::capitalizedName: \
 826          return #lowerName ""_s;
 827      JSC_INTL_RELEVANT_EXTENSION_KEYS(JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS)
 828  #undef JSC_RETURN_INTL_RELEVANT_EXTENSION_KEYS
 829      }
 830      return ASCIILiteral::null();
 831  }
 832  
 833  ResolvedLocale resolveLocale(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, LocaleMatcher localeMatcher, const ResolveLocaleOptions& options, std::initializer_list<RelevantExtensionKey> relevantExtensionKeys, Vector<String> (*localeData)(const String&, RelevantExtensionKey))
 834  {
 835      // ResolveLocale (availableLocales, requestedLocales, options, relevantExtensionKeys, localeData)
 836      // https://tc39.github.io/ecma402/#sec-resolvelocale
 837  
 838      MatcherResult matcherResult = localeMatcher == LocaleMatcher::Lookup
 839          ? lookupMatcher(globalObject, availableLocales, requestedLocales)
 840          : bestFitMatcher(globalObject, availableLocales, requestedLocales);
 841  
 842      String foundLocale = matcherResult.locale;
 843  
 844      Vector<StringView> extensionSubtags;
 845      if (!matcherResult.extension.isNull())
 846          extensionSubtags = unicodeExtensionComponents(matcherResult.extension);
 847  
 848      ResolvedLocale resolved;
 849      resolved.dataLocale = foundLocale;
 850  
 851      String supportedExtension = "-u"_s;
 852      for (RelevantExtensionKey key : relevantExtensionKeys) {
 853          ASCIILiteral keyString = relevantExtensionKeyString(key);
 854          Vector<String> keyLocaleData = localeData(foundLocale, key);
 855          ASSERT(!keyLocaleData.isEmpty());
 856  
 857          String value = keyLocaleData[0];
 858          String supportedExtensionAddition;
 859  
 860          if (!extensionSubtags.isEmpty()) {
 861              size_t keyPos = extensionSubtags.find(keyString);
 862              if (keyPos != notFound) {
 863                  if (keyPos + 1 < extensionSubtags.size() && extensionSubtags[keyPos + 1].length() > 2) {
 864                      StringView requestedValue = extensionSubtags[keyPos + 1];
 865                      auto dataPos = keyLocaleData.find(requestedValue);
 866                      if (dataPos != notFound) {
 867                          value = keyLocaleData[dataPos];
 868                          supportedExtensionAddition = makeString('-', keyString, '-', value);
 869                      }
 870                  } else if (keyLocaleData.contains("true"_s)) {
 871                      value = "true"_s;
 872                      supportedExtensionAddition = makeString('-', keyString);
 873                  }
 874              }
 875          }
 876  
 877          if (auto optionsValue = options[static_cast<unsigned>(key)]) {
 878              // Undefined should not get added to the options, it won't displace the extension.
 879              // Null will remove the extension.
 880              if ((optionsValue->isNull() || keyLocaleData.contains(*optionsValue)) && *optionsValue != value) {
 881                  value = optionsValue.value();
 882                  supportedExtensionAddition = String();
 883              }
 884          }
 885          resolved.extensions[static_cast<unsigned>(key)] = value;
 886          supportedExtension.append(supportedExtensionAddition);
 887      }
 888  
 889      if (supportedExtension.length() > 2) {
 890          StringView foundLocaleView(foundLocale);
 891          foundLocale = makeString(foundLocaleView.substring(0, matcherResult.extensionIndex), supportedExtension, foundLocaleView.substring(matcherResult.extensionIndex));
 892      }
 893  
 894      resolved.locale = WTFMove(foundLocale);
 895      return resolved;
 896  }
 897  
 898  static JSArray* lookupSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
 899  {
 900      // LookupSupportedLocales (availableLocales, requestedLocales)
 901      // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales
 902  
 903      VM& vm = globalObject->vm();
 904      auto scope = DECLARE_THROW_SCOPE(vm);
 905  
 906      size_t len = requestedLocales.size();
 907      JSArray* subset = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithUndecided), 0);
 908      if (!subset) {
 909          throwOutOfMemoryError(globalObject, scope);
 910          return nullptr;
 911      }
 912  
 913      unsigned index = 0;
 914      for (size_t k = 0; k < len; ++k) {
 915          const String& locale = requestedLocales[k];
 916          String noExtensionsLocale = removeUnicodeLocaleExtension(locale);
 917          String availableLocale = bestAvailableLocale(availableLocales, noExtensionsLocale);
 918          if (!availableLocale.isNull()) {
 919              subset->putDirectIndex(globalObject, index++, jsString(vm, locale));
 920              RETURN_IF_EXCEPTION(scope, nullptr);
 921          }
 922      }
 923  
 924      return subset;
 925  }
 926  
 927  static JSArray* bestFitSupportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales)
 928  {
 929      // BestFitSupportedLocales (availableLocales, requestedLocales)
 930      // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
 931  
 932      // FIXME: Implement something better than lookup.
 933      return lookupSupportedLocales(globalObject, availableLocales, requestedLocales);
 934  }
 935  
 936  JSValue supportedLocales(JSGlobalObject* globalObject, const HashSet<String>& availableLocales, const Vector<String>& requestedLocales, JSValue options)
 937  {
 938      // SupportedLocales (availableLocales, requestedLocales, options)
 939      // https://tc39.github.io/ecma402/#sec-supportedlocales
 940  
 941      VM& vm = globalObject->vm();
 942      auto scope = DECLARE_THROW_SCOPE(vm);
 943      String matcher;
 944  
 945      LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit);
 946      RETURN_IF_EXCEPTION(scope, JSValue());
 947  
 948      if (localeMatcher == LocaleMatcher::BestFit)
 949          RELEASE_AND_RETURN(scope, bestFitSupportedLocales(globalObject, availableLocales, requestedLocales));
 950      RELEASE_AND_RETURN(scope, lookupSupportedLocales(globalObject, availableLocales, requestedLocales));
 951  }
 952  
 953  Vector<String> numberingSystemsForLocale(const String& locale)
 954  {
 955      static LazyNeverDestroyed<Vector<String>> availableNumberingSystems;
 956      static std::once_flag initializeOnce;
 957      std::call_once(initializeOnce, [&] {
 958          availableNumberingSystems.construct();
 959          ASSERT(availableNumberingSystems->isEmpty());
 960          UErrorCode status = U_ZERO_ERROR;
 961          UEnumeration* numberingSystemNames = unumsys_openAvailableNames(&status);
 962          ASSERT(U_SUCCESS(status));
 963  
 964          int32_t resultLength;
 965          // Numbering system names are always ASCII, so use char[].
 966          while (const char* result = uenum_next(numberingSystemNames, &resultLength, &status)) {
 967              ASSERT(U_SUCCESS(status));
 968              auto numsys = unumsys_openByName(result, &status);
 969              ASSERT(U_SUCCESS(status));
 970              // Only support algorithmic if it is the default fot the locale, handled below.
 971              if (!unumsys_isAlgorithmic(numsys))
 972                  availableNumberingSystems->append(String(StringImpl::createStaticStringImpl(result, resultLength)));
 973              unumsys_close(numsys);
 974          }
 975          uenum_close(numberingSystemNames);
 976      });
 977  
 978      UErrorCode status = U_ZERO_ERROR;
 979      UNumberingSystem* defaultSystem = unumsys_open(locale.utf8().data(), &status);
 980      ASSERT(U_SUCCESS(status));
 981      String defaultSystemName(unumsys_getName(defaultSystem));
 982      unumsys_close(defaultSystem);
 983  
 984      Vector<String> numberingSystems({ defaultSystemName });
 985      numberingSystems.appendVector(availableNumberingSystems.get());
 986      return numberingSystems;
 987  }
 988  
 989  // unicode_language_subtag = alpha{2,3} | alpha{5,8} ;
 990  bool isUnicodeLanguageSubtag(StringView string)
 991  {
 992      auto length = string.length();
 993      return length >= 2 && length <= 8 && length != 4 && string.isAllSpecialCharacters<isASCIIAlpha>();
 994  }
 995  
 996  // unicode_script_subtag = alpha{4} ;
 997  bool isUnicodeScriptSubtag(StringView string)
 998  {
 999      return string.length() == 4 && string.isAllSpecialCharacters<isASCIIAlpha>();
1000  }
1001  
1002  // unicode_region_subtag = alpha{2} | digit{3} ;
1003  bool isUnicodeRegionSubtag(StringView string)
1004  {
1005      auto length = string.length();
1006      return (length == 2 && string.isAllSpecialCharacters<isASCIIAlpha>())
1007          || (length == 3 && string.isAllSpecialCharacters<isASCIIDigit>());
1008  }
1009  
1010  // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
1011  bool isUnicodeVariantSubtag(StringView string)
1012  {
1013      auto length = string.length();
1014      if (length >= 5 && length <= 8)
1015          return string.isAllSpecialCharacters<isASCIIAlphanumeric>();
1016      return length == 4 && isASCIIDigit(string[0]) && string.substring(1).isAllSpecialCharacters<isASCIIAlphanumeric>();
1017  }
1018  
1019  using VariantCode = uint64_t;
1020  static VariantCode parseVariantCode(StringView string)
1021  {
1022      ASSERT(isUnicodeVariantSubtag(string));
1023      ASSERT(string.isAllASCII());
1024      ASSERT(string.length() <= 8);
1025      ASSERT(string.length() >= 1);
1026      struct Code {
1027          LChar characters[8] { };
1028      };
1029      static_assert(std::is_unsigned_v<LChar>);
1030      static_assert(sizeof(VariantCode) == sizeof(Code));
1031      Code code { };
1032      for (unsigned index = 0; index < string.length(); ++index)
1033          code.characters[index] = toASCIILower(string[index]);
1034      VariantCode result = bitwise_cast<VariantCode>(code);
1035      ASSERT(result); // Not possible since some characters exist.
1036      ASSERT(result != static_cast<VariantCode>(-1)); // Not possible since all characters are ASCII (not Latin-1).
1037      return result;
1038  }
1039  
1040  static unsigned convertToUnicodeSingletonIndex(UChar singleton)
1041  {
1042      ASSERT(isASCIIAlphanumeric(singleton));
1043      singleton = toASCIILower(singleton);
1044      // 0 - 9 => numeric
1045      // 10 - 35 => alpha
1046      if (isASCIIDigit(singleton))
1047          return singleton - '0';
1048      return (singleton - 'a') + 10;
1049  }
1050  static constexpr unsigned numberOfUnicodeSingletons = 10 + 26; // Digits + Alphabets.
1051  
1052  static bool isUnicodeExtensionAttribute(StringView string)
1053  {
1054      auto length = string.length();
1055      return length >= 3 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>();
1056  }
1057  
1058  static bool isUnicodeExtensionKey(StringView string)
1059  {
1060      return string.length() == 2 && isASCIIAlphanumeric(string[0]) && isASCIIAlpha(string[1]);
1061  }
1062  
1063  static bool isUnicodeExtensionTypeComponent(StringView string)
1064  {
1065      auto length = string.length();
1066      return length >= 3 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>();
1067  }
1068  
1069  static bool isUnicodePUExtensionValue(StringView string)
1070  {
1071      auto length = string.length();
1072      return length >= 1 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>();
1073  }
1074  
1075  static bool isUnicodeOtherExtensionValue(StringView string)
1076  {
1077      auto length = string.length();
1078      return length >= 2 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>();
1079  }
1080  
1081  static bool isUnicodeTKey(StringView string)
1082  {
1083      return string.length() == 2 && isASCIIAlpha(string[0]) && isASCIIDigit(string[1]);
1084  }
1085  
1086  static bool isUnicodeTValueComponent(StringView string)
1087  {
1088      auto length = string.length();
1089      return length >= 3 && length <= 8 && string.isAllSpecialCharacters<isASCIIAlphanumeric>();
1090  }
1091  
1092  // The IsStructurallyValidLanguageTag abstract operation verifies that the locale argument (which must be a String value)
1093  //
1094  //     represents a well-formed "Unicode BCP 47 locale identifier" as specified in Unicode Technical Standard 35 section 3.2,
1095  //     does not include duplicate variant subtags, and
1096  //     does not include duplicate singleton subtags.
1097  //
1098  //  The abstract operation returns true if locale can be generated from the EBNF grammar in section 3.2 of the Unicode Technical Standard 35,
1099  //  starting with unicode_locale_id, and does not contain duplicate variant or singleton subtags (other than as a private use subtag).
1100  //  It returns false otherwise. Terminal value characters in the grammar are interpreted as the Unicode equivalents of the ASCII octet values given.
1101  //
1102  // https://unicode.org/reports/tr35/#Unicode_locale_identifier
1103  class LanguageTagParser {
1104  public:
1105      LanguageTagParser(StringView tag)
1106          : m_range(tag.splitAllowingEmptyEntries('-'))
1107          , m_cursor(m_range.begin())
1108      {
1109          ASSERT(m_cursor != m_range.end());
1110          m_current = *m_cursor;
1111      }
1112  
1113      bool parseUnicodeLocaleId();
1114      bool parseUnicodeLanguageId();
1115  
1116      bool isEOS()
1117      {
1118          return m_cursor == m_range.end();
1119      }
1120  
1121      bool next()
1122      {
1123          if (isEOS())
1124              return false;
1125  
1126          ++m_cursor;
1127          if (isEOS()) {
1128              m_current = StringView();
1129              return false;
1130          }
1131          m_current = *m_cursor;
1132          return true;
1133      }
1134  
1135  private:
1136      bool parseExtensionsAndPUExtensions();
1137  
1138      bool parseUnicodeExtensionAfterPrefix();
1139      bool parseTransformedExtensionAfterPrefix();
1140      bool parseOtherExtensionAfterPrefix();
1141      bool parsePUExtensionAfterPrefix();
1142  
1143      StringView::SplitResult m_range;
1144      StringView::SplitResult::Iterator m_cursor;
1145      StringView m_current;
1146  };
1147  
1148  bool LanguageTagParser::parseUnicodeLocaleId()
1149  {
1150      // unicode_locale_id    = unicode_language_id
1151      //                        extensions*
1152      //                        pu_extensions? ;
1153      ASSERT(!isEOS());
1154      if (!parseUnicodeLanguageId())
1155          return false;
1156      if (isEOS())
1157          return true;
1158      if (!parseExtensionsAndPUExtensions())
1159          return false;
1160      return true;
1161  }
1162  
1163  bool LanguageTagParser::parseUnicodeLanguageId()
1164  {
1165      // unicode_language_id  = unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1166      ASSERT(!isEOS());
1167      if (!isUnicodeLanguageSubtag(m_current))
1168          return false;
1169      if (!next())
1170          return true;
1171  
1172      if (isUnicodeScriptSubtag(m_current)) {
1173          if (!next())
1174              return true;
1175      }
1176  
1177      if (isUnicodeRegionSubtag(m_current)) {
1178          if (!next())
1179              return true;
1180      }
1181  
1182      HashSet<VariantCode> variantCodes;
1183      while (true) {
1184          if (!isUnicodeVariantSubtag(m_current))
1185              return true;
1186          // https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
1187          // does not include duplicate variant subtags
1188          if (!variantCodes.add(parseVariantCode(m_current)).isNewEntry)
1189              return false;
1190          if (!next())
1191              return true;
1192      }
1193  }
1194  
1195  bool LanguageTagParser::parseUnicodeExtensionAfterPrefix()
1196  {
1197      // ((sep keyword)+ | (sep attribute)+ (sep keyword)*) ;
1198      //
1199      // keyword = key (sep type)? ;
1200      // key = alphanum alpha ;
1201      // type = alphanum{3,8} (sep alphanum{3,8})* ;
1202      // attribute = alphanum{3,8} ;
1203      ASSERT(!isEOS());
1204      bool isAttributeOrKeyword = false;
1205      if (isUnicodeExtensionAttribute(m_current)) {
1206          isAttributeOrKeyword = true;
1207          while (true) {
1208              if (!isUnicodeExtensionAttribute(m_current))
1209                  break;
1210              if (!next())
1211                  return true;
1212          }
1213      }
1214  
1215      if (isUnicodeExtensionKey(m_current)) {
1216          isAttributeOrKeyword = true;
1217          while (true) {
1218              if (!isUnicodeExtensionKey(m_current))
1219                  break;
1220              if (!next())
1221                  return true;
1222              while (true) {
1223                  if (!isUnicodeExtensionTypeComponent(m_current))
1224                      break;
1225                  if (!next())
1226                      return true;
1227              }
1228          }
1229      }
1230  
1231      if (!isAttributeOrKeyword)
1232          return false;
1233      return true;
1234  }
1235  
1236  bool LanguageTagParser::parseTransformedExtensionAfterPrefix()
1237  {
1238      // ((sep tlang (sep tfield)*) | (sep tfield)+) ;
1239      //
1240      // tlang = unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1241      // tfield = tkey tvalue;
1242      // tkey = alpha digit ;
1243      // tvalue = (sep alphanum{3,8})+ ;
1244      ASSERT(!isEOS());
1245      bool found = false;
1246      if (isUnicodeLanguageSubtag(m_current)) {
1247          found = true;
1248          if (!parseUnicodeLanguageId())
1249              return false;
1250          if (isEOS())
1251              return true;
1252      }
1253  
1254      if (isUnicodeTKey(m_current)) {
1255          found = true;
1256          while (true) {
1257              if (!isUnicodeTKey(m_current))
1258                  break;
1259              if (!next())
1260                  return false;
1261              if (!isUnicodeTValueComponent(m_current))
1262                  return false;
1263              if (!next())
1264                  return true;
1265              while (true) {
1266                  if (!isUnicodeTValueComponent(m_current))
1267                      break;
1268                  if (!next())
1269                      return true;
1270              }
1271          }
1272      }
1273  
1274      return found;
1275  }
1276  
1277  bool LanguageTagParser::parseOtherExtensionAfterPrefix()
1278  {
1279      // (sep alphanum{2,8})+ ;
1280      ASSERT(!isEOS());
1281      if (!isUnicodeOtherExtensionValue(m_current))
1282          return false;
1283      if (!next())
1284          return true;
1285  
1286      while (true) {
1287          if (!isUnicodeOtherExtensionValue(m_current))
1288              return true;
1289          if (!next())
1290              return true;
1291      }
1292  }
1293  
1294  bool LanguageTagParser::parsePUExtensionAfterPrefix()
1295  {
1296      // (sep alphanum{1,8})+ ;
1297      ASSERT(!isEOS());
1298      if (!isUnicodePUExtensionValue(m_current))
1299          return false;
1300      if (!next())
1301          return true;
1302  
1303      while (true) {
1304          if (!isUnicodePUExtensionValue(m_current))
1305              return true;
1306          if (!next())
1307              return true;
1308      }
1309  }
1310  
1311  bool LanguageTagParser::parseExtensionsAndPUExtensions()
1312  {
1313      // unicode_locale_id    = unicode_language_id
1314      //                        extensions*
1315      //                        pu_extensions? ;
1316      //
1317      // extensions = unicode_locale_extensions
1318      //            | transformed_extensions
1319      //            | other_extensions ;
1320      //
1321      // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
1322      ASSERT(!isEOS());
1323      Bitmap<numberOfUnicodeSingletons> singletonsSet { };
1324      while (true) {
1325          if (m_current.length() != 1)
1326              return true;
1327          UChar prefixCode = m_current[0];
1328          if (!isASCIIAlphanumeric(prefixCode))
1329              return true;
1330  
1331          // https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
1332          // does not include duplicate singleton subtags.
1333          //
1334          // https://unicode.org/reports/tr35/#Unicode_locale_identifier
1335          // As is often the case, the complete syntactic constraints are not easily captured by ABNF,
1336          // so there is a further condition: There cannot be more than one extension with the same singleton (-a-, …, -t-, -u-, …).
1337          // Note that the private use extension (-x-) must come after all other extensions.
1338          if (singletonsSet.get(convertToUnicodeSingletonIndex(prefixCode)))
1339              return false;
1340          singletonsSet.set(convertToUnicodeSingletonIndex(prefixCode), true);
1341  
1342          switch (prefixCode) {
1343          case 'u':
1344          case 'U': {
1345              // unicode_locale_extensions = sep [uU] ((sep keyword)+ | (sep attribute)+ (sep keyword)*) ;
1346              if (!next())
1347                  return false;
1348              if (!parseUnicodeExtensionAfterPrefix())
1349                  return false;
1350              if (isEOS())
1351                  return true;
1352              break; // Next extension.
1353          }
1354          case 't':
1355          case 'T': {
1356              // transformed_extensions = sep [tT] ((sep tlang (sep tfield)*) | (sep tfield)+) ;
1357              if (!next())
1358                  return false;
1359              if (!parseTransformedExtensionAfterPrefix())
1360                  return false;
1361              if (isEOS())
1362                  return true;
1363              break; // Next extension.
1364          }
1365          case 'x':
1366          case 'X': {
1367              // pu_extensions = sep [xX] (sep alphanum{1,8})+ ;
1368              if (!next())
1369                  return false;
1370              if (!parsePUExtensionAfterPrefix())
1371                  return false;
1372              return true; // If pu_extensions appear, no extensions can follow after that. This must be the end of unicode_locale_id.
1373          }
1374          default: {
1375              // other_extensions = sep [alphanum-[tTuUxX]] (sep alphanum{2,8})+ ;
1376              if (!next())
1377                  return false;
1378              if (!parseOtherExtensionAfterPrefix())
1379                  return false;
1380              if (isEOS())
1381                  return true;
1382              break; // Next extension.
1383          }
1384          }
1385      }
1386  }
1387  
1388  // https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
1389  bool isStructurallyValidLanguageTag(StringView string)
1390  {
1391      LanguageTagParser parser(string);
1392      if (!parser.parseUnicodeLocaleId())
1393          return false;
1394      if (!parser.isEOS())
1395          return false;
1396      return true;
1397  }
1398  
1399  // unicode_language_id, but intersection of BCP47 and UTS35.
1400  // unicode_language_id =
1401  //     | unicode_language_subtag (sep unicode_script_subtag)? (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1402  // https://github.com/tc39/proposal-intl-displaynames/issues/79
1403  bool isUnicodeLanguageId(StringView string)
1404  {
1405      LanguageTagParser parser(string);
1406      if (!parser.parseUnicodeLanguageId())
1407          return false;
1408      if (!parser.isEOS())
1409          return false;
1410      return true;
1411  }
1412  
1413  bool isWellFormedCurrencyCode(StringView currency)
1414  {
1415      return currency.length() == 3 && currency.isAllSpecialCharacters<isASCIIAlpha>();
1416  }
1417  
1418  JSC_DEFINE_HOST_FUNCTION(intlObjectFuncGetCanonicalLocales, (JSGlobalObject* globalObject, CallFrame* callFrame))
1419  {
1420      // Intl.getCanonicalLocales(locales)
1421      // https://tc39.github.io/ecma402/#sec-intl.getcanonicallocales
1422  
1423      VM& vm = globalObject->vm();
1424      auto scope = DECLARE_THROW_SCOPE(vm);
1425  
1426      Vector<String> localeList = canonicalizeLocaleList(globalObject, callFrame->argument(0));
1427      RETURN_IF_EXCEPTION(scope, encodedJSValue());
1428      auto length = localeList.size();
1429  
1430      JSArray* localeArray = JSArray::tryCreate(vm, globalObject->arrayStructureForIndexingTypeDuringAllocation(ArrayWithContiguous), length);
1431      if (!localeArray) {
1432          throwOutOfMemoryError(globalObject, scope);
1433          return encodedJSValue();
1434      }
1435  
1436      for (size_t i = 0; i < length; ++i) {
1437          localeArray->putDirectIndex(globalObject, i, jsString(vm, localeList[i]));
1438          RETURN_IF_EXCEPTION(scope, encodedJSValue());
1439      }
1440      return JSValue::encode(localeArray);
1441  }
1442  
1443  } // namespace JSC