/ runtime / IntlCollator.cpp
IntlCollator.cpp
  1  /*
  2   * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family)
  3   * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com)
  4   * Copyright (C) 2016-2020 Apple Inc. All Rights Reserved.
  5   *
  6   * Redistribution and use in source and binary forms, with or without
  7   * modification, are permitted provided that the following conditions
  8   * are met:
  9   * 1. Redistributions of source code must retain the above copyright
 10   *    notice, this list of conditions and the following disclaimer.
 11   * 2. Redistributions in binary form must reproduce the above copyright
 12   *    notice, this list of conditions and the following disclaimer in the
 13   *    documentation and/or other materials provided with the distribution.
 14   *
 15   * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
 16   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 17   * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 18   * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
 19   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 20   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 21   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 22   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 23   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 24   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 25   * THE POSSIBILITY OF SUCH DAMAGE.
 26   */
 27  
 28  #include "config.h"
 29  #include "IntlCollator.h"
 30  
 31  #include "IntlObjectInlines.h"
 32  #include "JSBoundFunction.h"
 33  #include "JSCInlines.h"
 34  #include "ObjectConstructor.h"
 35  #include <wtf/HexNumber.h>
 36  
 37  namespace JSC {
 38  
 39  const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) };
 40  
 41  namespace IntlCollatorInternal {
 42  constexpr bool verbose = false;
 43  }
 44  
 45  IntlCollator* IntlCollator::create(VM& vm, Structure* structure)
 46  {
 47      IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure);
 48      format->finishCreation(vm);
 49      return format;
 50  }
 51  
 52  Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
 53  {
 54      return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
 55  }
 56  
 57  IntlCollator::IntlCollator(VM& vm, Structure* structure)
 58      : Base(vm, structure)
 59  {
 60  }
 61  
 62  void IntlCollator::finishCreation(VM& vm)
 63  {
 64      Base::finishCreation(vm);
 65      ASSERT(inherits(vm, info()));
 66  }
 67  
 68  void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor)
 69  {
 70      IntlCollator* thisObject = jsCast<IntlCollator*>(cell);
 71      ASSERT_GC_OBJECT_INHERITS(thisObject, info());
 72  
 73      Base::visitChildren(thisObject, visitor);
 74  
 75      visitor.append(thisObject->m_boundCompare);
 76  }
 77  
 78  Vector<String> IntlCollator::sortLocaleData(const String& locale, RelevantExtensionKey key)
 79  {
 80      // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
 81      Vector<String> keyLocaleData;
 82      switch (key) {
 83      case RelevantExtensionKey::Co: {
 84          // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
 85          keyLocaleData.append({ });
 86  
 87          UErrorCode status = U_ZERO_ERROR;
 88          auto enumeration = std::unique_ptr<UEnumeration, ICUDeleter<uenum_close>>(ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status));
 89          if (U_SUCCESS(status)) {
 90              const char* collation;
 91              while ((collation = uenum_next(enumeration.get(), nullptr, &status)) && U_SUCCESS(status)) {
 92                  // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array."
 93                  if (!strcmp(collation, "standard") || !strcmp(collation, "search"))
 94                      continue;
 95  
 96                  // Map keyword values to BCP 47 equivalents.
 97                  if (!strcmp(collation, "dictionary"))
 98                      keyLocaleData.append("dict"_s);
 99                  else if (!strcmp(collation, "gb2312han"))
100                      keyLocaleData.append("gb2312"_s);
101                  else if (!strcmp(collation, "phonebook"))
102                      keyLocaleData.append("phonebk"_s);
103                  else if (!strcmp(collation, "traditional"))
104                      keyLocaleData.append("trad"_s);
105                  else
106                      keyLocaleData.append(collation);
107              }
108          }
109          break;
110      }
111      case RelevantExtensionKey::Kf:
112          keyLocaleData.reserveInitialCapacity(3);
113          keyLocaleData.uncheckedAppend("false"_s);
114          keyLocaleData.uncheckedAppend("lower"_s);
115          keyLocaleData.uncheckedAppend("upper"_s);
116          break;
117      case RelevantExtensionKey::Kn:
118          keyLocaleData.reserveInitialCapacity(2);
119          keyLocaleData.uncheckedAppend("false"_s);
120          keyLocaleData.uncheckedAppend("true"_s);
121          break;
122      default:
123          ASSERT_NOT_REACHED();
124      }
125      return keyLocaleData;
126  }
127  
128  Vector<String> IntlCollator::searchLocaleData(const String&, RelevantExtensionKey key)
129  {
130      // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0)
131      Vector<String> keyLocaleData;
132      switch (key) {
133      case RelevantExtensionKey::Co:
134          // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values."
135          keyLocaleData.reserveInitialCapacity(1);
136          keyLocaleData.append({ });
137          break;
138      case RelevantExtensionKey::Kf:
139          keyLocaleData.reserveInitialCapacity(3);
140          keyLocaleData.uncheckedAppend("false"_s);
141          keyLocaleData.uncheckedAppend("lower"_s);
142          keyLocaleData.uncheckedAppend("upper"_s);
143          break;
144      case RelevantExtensionKey::Kn:
145          keyLocaleData.reserveInitialCapacity(2);
146          keyLocaleData.uncheckedAppend("false"_s);
147          keyLocaleData.uncheckedAppend("true"_s);
148          break;
149      default:
150          ASSERT_NOT_REACHED();
151      }
152      return keyLocaleData;
153  }
154  
155  // https://tc39.github.io/ecma402/#sec-initializecollator
156  void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue)
157  {
158      VM& vm = globalObject->vm();
159      auto scope = DECLARE_THROW_SCOPE(vm);
160  
161      auto requestedLocales = canonicalizeLocaleList(globalObject, locales);
162      RETURN_IF_EXCEPTION(scope, void());
163  
164      JSValue options = optionsValue;
165      if (!optionsValue.isUndefined()) {
166          options = optionsValue.toObject(globalObject);
167          RETURN_IF_EXCEPTION(scope, void());
168      }
169  
170      m_usage = intlOption<Usage>(globalObject, options, vm.propertyNames->usage, { { "sort"_s, Usage::Sort }, { "search"_s, Usage::Search } }, "usage must be either \"sort\" or \"search\""_s, Usage::Sort);
171      RETURN_IF_EXCEPTION(scope, void());
172  
173      auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData;
174  
175      ResolveLocaleOptions localeOptions;
176  
177      LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit);
178      RETURN_IF_EXCEPTION(scope, void());
179  
180      {
181          String collation = intlStringOption(globalObject, options, vm.propertyNames->collation, { }, nullptr, nullptr);
182          RETURN_IF_EXCEPTION(scope, void());
183          if (!collation.isNull()) {
184              if (!isUnicodeLocaleIdentifierType(collation)) {
185                  throwRangeError(globalObject, scope, "collation is not a well-formed collation value"_s);
186                  return;
187              }
188              localeOptions[static_cast<unsigned>(RelevantExtensionKey::Co)] = WTFMove(collation);
189          }
190      }
191  
192      TriState numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric);
193      RETURN_IF_EXCEPTION(scope, void());
194      if (numeric != TriState::Indeterminate)
195          localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kn)] = String(numeric == TriState::True ? "true"_s : "false"_s);
196  
197      String caseFirstOption = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr);
198      RETURN_IF_EXCEPTION(scope, void());
199      if (!caseFirstOption.isNull())
200          localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kf)] = caseFirstOption;
201  
202      auto& availableLocales = intlCollatorAvailableLocales();
203      auto resolved = resolveLocale(globalObject, availableLocales, requestedLocales, localeMatcher, localeOptions, { RelevantExtensionKey::Co, RelevantExtensionKey::Kf, RelevantExtensionKey::Kn }, localeData);
204  
205      m_locale = resolved.locale;
206      if (m_locale.isEmpty()) {
207          throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s);
208          return;
209      }
210  
211      const String& collation = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Co)];
212      m_collation = collation.isNull() ? "default"_s : collation;
213      m_numeric = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kn)] == "true"_s;
214  
215      const String& caseFirstString = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kf)];
216      if (caseFirstString == "lower")
217          m_caseFirst = CaseFirst::Lower;
218      else if (caseFirstString == "upper")
219          m_caseFirst = CaseFirst::Upper;
220      else
221          m_caseFirst = CaseFirst::False;
222  
223      m_sensitivity = intlOption<Sensitivity>(globalObject, options, vm.propertyNames->sensitivity, { { "base"_s, Sensitivity::Base }, { "accent"_s, Sensitivity::Accent }, { "case"_s, Sensitivity::Case }, { "variant"_s, Sensitivity::Variant } }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\""_s, Sensitivity::Variant);
224      RETURN_IF_EXCEPTION(scope, void());
225  
226      TriState ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation);
227      RETURN_IF_EXCEPTION(scope, void());
228      m_ignorePunctuation = (ignorePunctuation == TriState::True);
229  
230      // UCollator does not offer an option to configure "usage" via ucol_setAttribute. So we need to pass this option via locale.
231      CString dataLocaleWithExtensions;
232      switch (m_usage) {
233      case Usage::Sort:
234          if (collation.isNull())
235              dataLocaleWithExtensions = resolved.dataLocale.utf8();
236          else
237              dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-", m_collation).utf8();
238          break;
239      case Usage::Search:
240          // searchLocaleData filters out "co" unicode extension. However, we need to pass "co" to ICU when Usage::Search is specified.
241          // So we need to pass "co" unicode extension through locale. Since the other relevant extensions are handled via ucol_setAttribute,
242          // we can just use dataLocale
243          // Since searchLocaleData filters out "co" unicode extension, "collation" option is just ignored.
244          dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-search").utf8();
245          break;
246      }
247      dataLogLnIf(IntlCollatorInternal::verbose, "locale:(", resolved.locale, "),dataLocaleWithExtensions:(", dataLocaleWithExtensions, ")");
248  
249      UErrorCode status = U_ZERO_ERROR;
250      m_collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(dataLocaleWithExtensions.data(), &status));
251      if (U_FAILURE(status)) {
252          throwTypeError(globalObject, scope, "failed to initialize Collator"_s);
253          return;
254      }
255  
256      UColAttributeValue strength = UCOL_PRIMARY;
257      UColAttributeValue caseLevel = UCOL_OFF;
258      UColAttributeValue caseFirst = UCOL_OFF;
259      switch (m_sensitivity) {
260      case Sensitivity::Base:
261          break;
262      case Sensitivity::Accent:
263          strength = UCOL_SECONDARY;
264          break;
265      case Sensitivity::Case:
266          caseLevel = UCOL_ON;
267          break;
268      case Sensitivity::Variant:
269          strength = UCOL_TERTIARY;
270          break;
271      }
272      switch (m_caseFirst) {
273      case CaseFirst::False:
274          break;
275      case CaseFirst::Lower:
276          caseFirst = UCOL_LOWER_FIRST;
277          break;
278      case CaseFirst::Upper:
279          caseFirst = UCOL_UPPER_FIRST;
280          break;
281      }
282  
283      // Keep in sync with canDoASCIIUCADUCETComparisonSlow about used attributes.
284      ucol_setAttribute(m_collator.get(), UCOL_STRENGTH, strength, &status);
285      ucol_setAttribute(m_collator.get(), UCOL_CASE_LEVEL, caseLevel, &status);
286      ucol_setAttribute(m_collator.get(), UCOL_CASE_FIRST, caseFirst, &status);
287      ucol_setAttribute(m_collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status);
288  
289      // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be
290      // ignored. There is currently no way to ignore only punctuation.
291      ucol_setAttribute(m_collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status);
292  
293      // "The method is required to return 0 when comparing Strings that are considered canonically
294      // equivalent by the Unicode standard."
295      ucol_setAttribute(m_collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
296      ASSERT(U_SUCCESS(status));
297  }
298  
299  // https://tc39.es/ecma402/#sec-collator-comparestrings
300  JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y) const
301  {
302      ASSERT(m_collator);
303  
304      VM& vm = globalObject->vm();
305      auto scope = DECLARE_THROW_SCOPE(vm);
306  
307      UErrorCode status = U_ZERO_ERROR;
308      UCollationResult result = ([&]() -> UCollationResult {
309          if (x.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>() && y.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>()) {
310              if (canDoASCIIUCADUCETComparison()) {
311                  if (x.is8Bit() && y.is8Bit())
312                      return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters8(), y.length());
313                  if (x.is8Bit())
314                      return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters16(), y.length());
315                  if (y.is8Bit())
316                      return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters8(), y.length());
317                  return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters16(), y.length());
318              }
319  
320              if (x.is8Bit() && y.is8Bit())
321                  return ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status);
322          }
323          return ucol_strcoll(m_collator.get(), x.upconvertedCharacters(), x.length(), y.upconvertedCharacters(), y.length());
324      }());
325      if (U_FAILURE(status))
326          return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s));
327      return jsNumber(result);
328  }
329  
330  ASCIILiteral IntlCollator::usageString(Usage usage)
331  {
332      switch (usage) {
333      case Usage::Sort:
334          return "sort"_s;
335      case Usage::Search:
336          return "search"_s;
337      }
338      ASSERT_NOT_REACHED();
339      return ASCIILiteral::null();
340  }
341  
342  ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity)
343  {
344      switch (sensitivity) {
345      case Sensitivity::Base:
346          return "base"_s;
347      case Sensitivity::Accent:
348          return "accent"_s;
349      case Sensitivity::Case:
350          return "case"_s;
351      case Sensitivity::Variant:
352          return "variant"_s;
353      }
354      ASSERT_NOT_REACHED();
355      return ASCIILiteral::null();
356  }
357  
358  ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst)
359  {
360      switch (caseFirst) {
361      case CaseFirst::False:
362          return "false"_s;
363      case CaseFirst::Lower:
364          return "lower"_s;
365      case CaseFirst::Upper:
366          return "upper"_s;
367      }
368      ASSERT_NOT_REACHED();
369      return ASCIILiteral::null();
370  }
371  
372  // https://tc39.es/ecma402/#sec-intl.collator.prototype.resolvedoptions
373  JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject) const
374  {
375      VM& vm = globalObject->vm();
376      JSObject* options = constructEmptyObject(globalObject);
377      options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale));
378      options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage)));
379      options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity)));
380      options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation));
381      options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation));
382      options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric));
383      options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst)));
384      return options;
385  }
386  
387  void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format)
388  {
389      m_boundCompare.set(vm, this, format);
390  }
391  
392  static bool canDoASCIIUCADUCETComparisonWithUCollator(UCollator& collator)
393  {
394      // Attributes are default ones unless we set. So, non-configured attributes are default ones.
395      static constexpr std::pair<UColAttribute, UColAttributeValue> attributes[] = {
396          { UCOL_FRENCH_COLLATION, UCOL_OFF },
397          { UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE },
398          { UCOL_STRENGTH, UCOL_TERTIARY },
399          { UCOL_CASE_LEVEL, UCOL_OFF },
400          { UCOL_CASE_FIRST, UCOL_OFF },
401          { UCOL_NUMERIC_COLLATION, UCOL_OFF },
402          // We do not check UCOL_NORMALIZATION_MODE status since FCD normalization does nothing for ASCII strings.
403      };
404  
405      for (auto& pair : attributes) {
406          UErrorCode status = U_ZERO_ERROR;
407          auto result = ucol_getAttribute(&collator, pair.first, &status);
408          ASSERT(U_SUCCESS(status));
409          if (result != pair.second)
410              return false;
411      }
412  
413      // Check existence of tailoring rules. If they do not exist, collation algorithm is UCA DUCET.
414      int32_t length = 0;
415      ucol_getRules(&collator, &length);
416      return !length;
417  }
418  
419  bool IntlCollator::updateCanDoASCIIUCADUCETComparison() const
420  {
421      // ICU uses the CLDR root collation order as a default starting point for ordering. (The CLDR root collation is based on the UCA DUCET.)
422      // And customizes this root collation via rules.
423      // The root collation is UCA DUCET and it is code-point comparison if the characters are all ASCII.
424      // http://www.unicode.org/reports/tr10/
425      ASSERT(m_collator);
426      auto checkASCIIUCADUCETComparisonCompatibility = [&] {
427          if (m_usage != Usage::Sort)
428              return false;
429          if (m_collation != "default"_s)
430              return false;
431          if (m_sensitivity != Sensitivity::Variant)
432              return false;
433          if (m_caseFirst != CaseFirst::False)
434              return false;
435          if (m_numeric)
436              return false;
437          if (m_ignorePunctuation)
438              return false;
439          return canDoASCIIUCADUCETComparisonWithUCollator(*m_collator);
440      };
441      bool result = checkASCIIUCADUCETComparisonCompatibility();
442      m_canDoASCIIUCADUCETComparison = triState(result);
443      return result;
444  }
445  
446  #if ASSERT_ENABLED
447  void IntlCollator::checkICULocaleInvariants(const HashSet<String>& locales)
448  {
449      for (auto& locale : locales) {
450          auto checkASCIIOrderingWithDUCET = [](const String& locale, UCollator& collator) {
451              bool allAreGood = true;
452              for (unsigned x = 0; x < 128; ++x) {
453                  for (unsigned y = 0; y < 128; ++y) {
454                      if (canUseASCIIUCADUCETComparison(x) && canUseASCIIUCADUCETComparison(y)) {
455                          UErrorCode status = U_ZERO_ERROR;
456                          UChar xstring[] = { static_cast<UChar>(x), 0 };
457                          UChar ystring[] = { static_cast<UChar>(y), 0 };
458                          auto resultICU = ucol_strcoll(&collator, xstring, 1, ystring, 1);
459                          ASSERT(U_SUCCESS(status));
460                          auto resultJSC = compareASCIIWithUCADUCET(xstring, 1, ystring, 1);
461                          if (resultICU != resultJSC) {
462                              dataLogLn("BAD ", locale, " ", makeString(hex(x)), "(", StringView(xstring, 1), ") <=> ", makeString(hex(y)), "(", StringView(ystring, 1), ") ICU:(", static_cast<int32_t>(resultICU), "),JSC:(", static_cast<int32_t>(resultJSC), ")");
463                              allAreGood = false;
464                          }
465                      }
466                  }
467              }
468              return allAreGood;
469          };
470  
471          UErrorCode status = U_ZERO_ERROR;
472          auto collator = std::unique_ptr<UCollator, ICUDeleter<ucol_close>>(ucol_open(locale.ascii().data(), &status));
473  
474          ASSERT(U_SUCCESS(status));
475          ucol_setAttribute(collator.get(), UCOL_STRENGTH, UCOL_TERTIARY, &status);
476          ASSERT(U_SUCCESS(status));
477          ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, UCOL_OFF, &status);
478          ASSERT(U_SUCCESS(status));
479          ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, UCOL_OFF, &status);
480          ASSERT(U_SUCCESS(status));
481          ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, UCOL_OFF, &status);
482          ASSERT(U_SUCCESS(status));
483          ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, UCOL_DEFAULT, &status);
484          ASSERT(U_SUCCESS(status));
485          ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
486          ASSERT(U_SUCCESS(status));
487  
488          if (!canDoASCIIUCADUCETComparisonWithUCollator(*collator))
489              continue;
490  
491          // This should not have reorder.
492          int32_t length = ucol_getReorderCodes(collator.get(), nullptr, 0, &status);
493          ASSERT(U_SUCCESS(status));
494          ASSERT(!length);
495  
496          // Contractions and Expansions are defined as a rule. If there is no tailoring rule, then they should be UCA DUCET's default.
497  
498          auto ensureNotIncludingASCII = [&](USet& set) {
499              Vector<UChar, 32> buffer;
500              for (int32_t index = 0, count = uset_getItemCount(&set); index < count; ++index) {
501                  // start and end are inclusive.
502                  UChar32 start = 0;
503                  UChar32 end = 0;
504                  auto status = callBufferProducingFunction(uset_getItem, &set, index, &start, &end, buffer);
505                  ASSERT(U_SUCCESS(status));
506                  if (buffer.isEmpty()) {
507                      if (isASCII(start)) {
508                          dataLogLn("BAD ", locale, " including ASCII tailored characters");
509                          CRASH();
510                      }
511                  } else {
512                      if (StringView(buffer.data(), buffer.size()).isAllASCII()) {
513                          dataLogLn("BAD ", locale, " ", String(buffer.data(), buffer.size()), " including ASCII tailored characters");
514                          CRASH();
515                      }
516                  }
517              }
518          };
519  
520          auto contractions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty());
521          auto expansions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty());
522          ucol_getContractionsAndExpansions(collator.get(), contractions.get(), expansions.get(), true, &status);
523          ASSERT(U_SUCCESS(status));
524  
525          ensureNotIncludingASCII(*contractions);
526          ensureNotIncludingASCII(*expansions);
527  
528          // This locale should not have tailoring.
529          auto tailored = std::unique_ptr<USet, ICUDeleter<uset_close>>(ucol_getTailoredSet(collator.get(), &status));
530          ensureNotIncludingASCII(*tailored);
531  
532          dataLogLnIf(IntlCollatorInternal::verbose, "LOCALE ", locale);
533  
534          ASSERT(checkASCIIOrderingWithDUCET(locale, *collator));
535      }
536  }
537  #endif
538  
539  } // namespace JSC