IntlCollator.cpp
1 /* 2 * Copyright (C) 2015 Andy VanWagoner (andy@vanwagoner.family) 3 * Copyright (C) 2015 Sukolsak Sakshuwong (sukolsak@gmail.com) 4 * Copyright (C) 2016-2020 Apple Inc. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS 19 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 25 * THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 #include "IntlCollator.h" 30 31 #include "IntlObjectInlines.h" 32 #include "JSBoundFunction.h" 33 #include "JSCInlines.h" 34 #include "ObjectConstructor.h" 35 #include <wtf/HexNumber.h> 36 37 namespace JSC { 38 39 const ClassInfo IntlCollator::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlCollator) }; 40 41 namespace IntlCollatorInternal { 42 constexpr bool verbose = false; 43 } 44 45 IntlCollator* IntlCollator::create(VM& vm, Structure* structure) 46 { 47 IntlCollator* format = new (NotNull, allocateCell<IntlCollator>(vm.heap)) IntlCollator(vm, structure); 48 format->finishCreation(vm); 49 return format; 50 } 51 52 Structure* IntlCollator::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) 53 { 54 return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); 55 } 56 57 IntlCollator::IntlCollator(VM& vm, Structure* structure) 58 : Base(vm, structure) 59 { 60 } 61 62 void IntlCollator::finishCreation(VM& vm) 63 { 64 Base::finishCreation(vm); 65 ASSERT(inherits(vm, info())); 66 } 67 68 void IntlCollator::visitChildren(JSCell* cell, SlotVisitor& visitor) 69 { 70 IntlCollator* thisObject = jsCast<IntlCollator*>(cell); 71 ASSERT_GC_OBJECT_INHERITS(thisObject, info()); 72 73 Base::visitChildren(thisObject, visitor); 74 75 visitor.append(thisObject->m_boundCompare); 76 } 77 78 Vector<String> IntlCollator::sortLocaleData(const String& locale, RelevantExtensionKey key) 79 { 80 // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) 81 Vector<String> keyLocaleData; 82 switch (key) { 83 case RelevantExtensionKey::Co: { 84 // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." 85 keyLocaleData.append({ }); 86 87 UErrorCode status = U_ZERO_ERROR; 88 auto enumeration = std::unique_ptr<UEnumeration, ICUDeleter<uenum_close>>(ucol_getKeywordValuesForLocale("collation", locale.utf8().data(), false, &status)); 89 if (U_SUCCESS(status)) { 90 const char* collation; 91 while ((collation = uenum_next(enumeration.get(), nullptr, &status)) && U_SUCCESS(status)) { 92 // 10.2.3 "The values "standard" and "search" must not be used as elements in any [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co array." 93 if (!strcmp(collation, "standard") || !strcmp(collation, "search")) 94 continue; 95 96 // Map keyword values to BCP 47 equivalents. 97 if (!strcmp(collation, "dictionary")) 98 keyLocaleData.append("dict"_s); 99 else if (!strcmp(collation, "gb2312han")) 100 keyLocaleData.append("gb2312"_s); 101 else if (!strcmp(collation, "phonebook")) 102 keyLocaleData.append("phonebk"_s); 103 else if (!strcmp(collation, "traditional")) 104 keyLocaleData.append("trad"_s); 105 else 106 keyLocaleData.append(collation); 107 } 108 } 109 break; 110 } 111 case RelevantExtensionKey::Kf: 112 keyLocaleData.reserveInitialCapacity(3); 113 keyLocaleData.uncheckedAppend("false"_s); 114 keyLocaleData.uncheckedAppend("lower"_s); 115 keyLocaleData.uncheckedAppend("upper"_s); 116 break; 117 case RelevantExtensionKey::Kn: 118 keyLocaleData.reserveInitialCapacity(2); 119 keyLocaleData.uncheckedAppend("false"_s); 120 keyLocaleData.uncheckedAppend("true"_s); 121 break; 122 default: 123 ASSERT_NOT_REACHED(); 124 } 125 return keyLocaleData; 126 } 127 128 Vector<String> IntlCollator::searchLocaleData(const String&, RelevantExtensionKey key) 129 { 130 // 9.1 Internal slots of Service Constructors & 10.2.3 Internal slots (ECMA-402 2.0) 131 Vector<String> keyLocaleData; 132 switch (key) { 133 case RelevantExtensionKey::Co: 134 // 10.2.3 "The first element of [[sortLocaleData]][locale].co and [[searchLocaleData]][locale].co must be null for all locale values." 135 keyLocaleData.reserveInitialCapacity(1); 136 keyLocaleData.append({ }); 137 break; 138 case RelevantExtensionKey::Kf: 139 keyLocaleData.reserveInitialCapacity(3); 140 keyLocaleData.uncheckedAppend("false"_s); 141 keyLocaleData.uncheckedAppend("lower"_s); 142 keyLocaleData.uncheckedAppend("upper"_s); 143 break; 144 case RelevantExtensionKey::Kn: 145 keyLocaleData.reserveInitialCapacity(2); 146 keyLocaleData.uncheckedAppend("false"_s); 147 keyLocaleData.uncheckedAppend("true"_s); 148 break; 149 default: 150 ASSERT_NOT_REACHED(); 151 } 152 return keyLocaleData; 153 } 154 155 // https://tc39.github.io/ecma402/#sec-initializecollator 156 void IntlCollator::initializeCollator(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue) 157 { 158 VM& vm = globalObject->vm(); 159 auto scope = DECLARE_THROW_SCOPE(vm); 160 161 auto requestedLocales = canonicalizeLocaleList(globalObject, locales); 162 RETURN_IF_EXCEPTION(scope, void()); 163 164 JSValue options = optionsValue; 165 if (!optionsValue.isUndefined()) { 166 options = optionsValue.toObject(globalObject); 167 RETURN_IF_EXCEPTION(scope, void()); 168 } 169 170 m_usage = intlOption<Usage>(globalObject, options, vm.propertyNames->usage, { { "sort"_s, Usage::Sort }, { "search"_s, Usage::Search } }, "usage must be either \"sort\" or \"search\""_s, Usage::Sort); 171 RETURN_IF_EXCEPTION(scope, void()); 172 173 auto localeData = (m_usage == Usage::Sort) ? sortLocaleData : searchLocaleData; 174 175 ResolveLocaleOptions localeOptions; 176 177 LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit); 178 RETURN_IF_EXCEPTION(scope, void()); 179 180 { 181 String collation = intlStringOption(globalObject, options, vm.propertyNames->collation, { }, nullptr, nullptr); 182 RETURN_IF_EXCEPTION(scope, void()); 183 if (!collation.isNull()) { 184 if (!isUnicodeLocaleIdentifierType(collation)) { 185 throwRangeError(globalObject, scope, "collation is not a well-formed collation value"_s); 186 return; 187 } 188 localeOptions[static_cast<unsigned>(RelevantExtensionKey::Co)] = WTFMove(collation); 189 } 190 } 191 192 TriState numeric = intlBooleanOption(globalObject, options, vm.propertyNames->numeric); 193 RETURN_IF_EXCEPTION(scope, void()); 194 if (numeric != TriState::Indeterminate) 195 localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kn)] = String(numeric == TriState::True ? "true"_s : "false"_s); 196 197 String caseFirstOption = intlStringOption(globalObject, options, vm.propertyNames->caseFirst, { "upper", "lower", "false" }, "caseFirst must be either \"upper\", \"lower\", or \"false\"", nullptr); 198 RETURN_IF_EXCEPTION(scope, void()); 199 if (!caseFirstOption.isNull()) 200 localeOptions[static_cast<unsigned>(RelevantExtensionKey::Kf)] = caseFirstOption; 201 202 auto& availableLocales = intlCollatorAvailableLocales(); 203 auto resolved = resolveLocale(globalObject, availableLocales, requestedLocales, localeMatcher, localeOptions, { RelevantExtensionKey::Co, RelevantExtensionKey::Kf, RelevantExtensionKey::Kn }, localeData); 204 205 m_locale = resolved.locale; 206 if (m_locale.isEmpty()) { 207 throwTypeError(globalObject, scope, "failed to initialize Collator due to invalid locale"_s); 208 return; 209 } 210 211 const String& collation = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Co)]; 212 m_collation = collation.isNull() ? "default"_s : collation; 213 m_numeric = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kn)] == "true"_s; 214 215 const String& caseFirstString = resolved.extensions[static_cast<unsigned>(RelevantExtensionKey::Kf)]; 216 if (caseFirstString == "lower") 217 m_caseFirst = CaseFirst::Lower; 218 else if (caseFirstString == "upper") 219 m_caseFirst = CaseFirst::Upper; 220 else 221 m_caseFirst = CaseFirst::False; 222 223 m_sensitivity = intlOption<Sensitivity>(globalObject, options, vm.propertyNames->sensitivity, { { "base"_s, Sensitivity::Base }, { "accent"_s, Sensitivity::Accent }, { "case"_s, Sensitivity::Case }, { "variant"_s, Sensitivity::Variant } }, "sensitivity must be either \"base\", \"accent\", \"case\", or \"variant\""_s, Sensitivity::Variant); 224 RETURN_IF_EXCEPTION(scope, void()); 225 226 TriState ignorePunctuation = intlBooleanOption(globalObject, options, vm.propertyNames->ignorePunctuation); 227 RETURN_IF_EXCEPTION(scope, void()); 228 m_ignorePunctuation = (ignorePunctuation == TriState::True); 229 230 // UCollator does not offer an option to configure "usage" via ucol_setAttribute. So we need to pass this option via locale. 231 CString dataLocaleWithExtensions; 232 switch (m_usage) { 233 case Usage::Sort: 234 if (collation.isNull()) 235 dataLocaleWithExtensions = resolved.dataLocale.utf8(); 236 else 237 dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-", m_collation).utf8(); 238 break; 239 case Usage::Search: 240 // searchLocaleData filters out "co" unicode extension. However, we need to pass "co" to ICU when Usage::Search is specified. 241 // So we need to pass "co" unicode extension through locale. Since the other relevant extensions are handled via ucol_setAttribute, 242 // we can just use dataLocale 243 // Since searchLocaleData filters out "co" unicode extension, "collation" option is just ignored. 244 dataLocaleWithExtensions = makeString(resolved.dataLocale, "-u-co-search").utf8(); 245 break; 246 } 247 dataLogLnIf(IntlCollatorInternal::verbose, "locale:(", resolved.locale, "),dataLocaleWithExtensions:(", dataLocaleWithExtensions, ")"); 248 249 UErrorCode status = U_ZERO_ERROR; 250 m_collator = std::unique_ptr<UCollator, UCollatorDeleter>(ucol_open(dataLocaleWithExtensions.data(), &status)); 251 if (U_FAILURE(status)) { 252 throwTypeError(globalObject, scope, "failed to initialize Collator"_s); 253 return; 254 } 255 256 UColAttributeValue strength = UCOL_PRIMARY; 257 UColAttributeValue caseLevel = UCOL_OFF; 258 UColAttributeValue caseFirst = UCOL_OFF; 259 switch (m_sensitivity) { 260 case Sensitivity::Base: 261 break; 262 case Sensitivity::Accent: 263 strength = UCOL_SECONDARY; 264 break; 265 case Sensitivity::Case: 266 caseLevel = UCOL_ON; 267 break; 268 case Sensitivity::Variant: 269 strength = UCOL_TERTIARY; 270 break; 271 } 272 switch (m_caseFirst) { 273 case CaseFirst::False: 274 break; 275 case CaseFirst::Lower: 276 caseFirst = UCOL_LOWER_FIRST; 277 break; 278 case CaseFirst::Upper: 279 caseFirst = UCOL_UPPER_FIRST; 280 break; 281 } 282 283 // Keep in sync with canDoASCIIUCADUCETComparisonSlow about used attributes. 284 ucol_setAttribute(m_collator.get(), UCOL_STRENGTH, strength, &status); 285 ucol_setAttribute(m_collator.get(), UCOL_CASE_LEVEL, caseLevel, &status); 286 ucol_setAttribute(m_collator.get(), UCOL_CASE_FIRST, caseFirst, &status); 287 ucol_setAttribute(m_collator.get(), UCOL_NUMERIC_COLLATION, m_numeric ? UCOL_ON : UCOL_OFF, &status); 288 289 // FIXME: Setting UCOL_ALTERNATE_HANDLING to UCOL_SHIFTED causes punctuation and whitespace to be 290 // ignored. There is currently no way to ignore only punctuation. 291 ucol_setAttribute(m_collator.get(), UCOL_ALTERNATE_HANDLING, m_ignorePunctuation ? UCOL_SHIFTED : UCOL_DEFAULT, &status); 292 293 // "The method is required to return 0 when comparing Strings that are considered canonically 294 // equivalent by the Unicode standard." 295 ucol_setAttribute(m_collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 296 ASSERT(U_SUCCESS(status)); 297 } 298 299 // https://tc39.es/ecma402/#sec-collator-comparestrings 300 JSValue IntlCollator::compareStrings(JSGlobalObject* globalObject, StringView x, StringView y) const 301 { 302 ASSERT(m_collator); 303 304 VM& vm = globalObject->vm(); 305 auto scope = DECLARE_THROW_SCOPE(vm); 306 307 UErrorCode status = U_ZERO_ERROR; 308 UCollationResult result = ([&]() -> UCollationResult { 309 if (x.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>() && y.isAllSpecialCharacters<canUseASCIIUCADUCETComparison>()) { 310 if (canDoASCIIUCADUCETComparison()) { 311 if (x.is8Bit() && y.is8Bit()) 312 return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters8(), y.length()); 313 if (x.is8Bit()) 314 return compareASCIIWithUCADUCET(x.characters8(), x.length(), y.characters16(), y.length()); 315 if (y.is8Bit()) 316 return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters8(), y.length()); 317 return compareASCIIWithUCADUCET(x.characters16(), x.length(), y.characters16(), y.length()); 318 } 319 320 if (x.is8Bit() && y.is8Bit()) 321 return ucol_strcollUTF8(m_collator.get(), bitwise_cast<const char*>(x.characters8()), x.length(), bitwise_cast<const char*>(y.characters8()), y.length(), &status); 322 } 323 return ucol_strcoll(m_collator.get(), x.upconvertedCharacters(), x.length(), y.upconvertedCharacters(), y.length()); 324 }()); 325 if (U_FAILURE(status)) 326 return throwException(globalObject, scope, createError(globalObject, "Failed to compare strings."_s)); 327 return jsNumber(result); 328 } 329 330 ASCIILiteral IntlCollator::usageString(Usage usage) 331 { 332 switch (usage) { 333 case Usage::Sort: 334 return "sort"_s; 335 case Usage::Search: 336 return "search"_s; 337 } 338 ASSERT_NOT_REACHED(); 339 return ASCIILiteral::null(); 340 } 341 342 ASCIILiteral IntlCollator::sensitivityString(Sensitivity sensitivity) 343 { 344 switch (sensitivity) { 345 case Sensitivity::Base: 346 return "base"_s; 347 case Sensitivity::Accent: 348 return "accent"_s; 349 case Sensitivity::Case: 350 return "case"_s; 351 case Sensitivity::Variant: 352 return "variant"_s; 353 } 354 ASSERT_NOT_REACHED(); 355 return ASCIILiteral::null(); 356 } 357 358 ASCIILiteral IntlCollator::caseFirstString(CaseFirst caseFirst) 359 { 360 switch (caseFirst) { 361 case CaseFirst::False: 362 return "false"_s; 363 case CaseFirst::Lower: 364 return "lower"_s; 365 case CaseFirst::Upper: 366 return "upper"_s; 367 } 368 ASSERT_NOT_REACHED(); 369 return ASCIILiteral::null(); 370 } 371 372 // https://tc39.es/ecma402/#sec-intl.collator.prototype.resolvedoptions 373 JSObject* IntlCollator::resolvedOptions(JSGlobalObject* globalObject) const 374 { 375 VM& vm = globalObject->vm(); 376 JSObject* options = constructEmptyObject(globalObject); 377 options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale)); 378 options->putDirect(vm, vm.propertyNames->usage, jsNontrivialString(vm, usageString(m_usage))); 379 options->putDirect(vm, vm.propertyNames->sensitivity, jsNontrivialString(vm, sensitivityString(m_sensitivity))); 380 options->putDirect(vm, vm.propertyNames->ignorePunctuation, jsBoolean(m_ignorePunctuation)); 381 options->putDirect(vm, vm.propertyNames->collation, jsString(vm, m_collation)); 382 options->putDirect(vm, vm.propertyNames->numeric, jsBoolean(m_numeric)); 383 options->putDirect(vm, vm.propertyNames->caseFirst, jsNontrivialString(vm, caseFirstString(m_caseFirst))); 384 return options; 385 } 386 387 void IntlCollator::setBoundCompare(VM& vm, JSBoundFunction* format) 388 { 389 m_boundCompare.set(vm, this, format); 390 } 391 392 static bool canDoASCIIUCADUCETComparisonWithUCollator(UCollator& collator) 393 { 394 // Attributes are default ones unless we set. So, non-configured attributes are default ones. 395 static constexpr std::pair<UColAttribute, UColAttributeValue> attributes[] = { 396 { UCOL_FRENCH_COLLATION, UCOL_OFF }, 397 { UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE }, 398 { UCOL_STRENGTH, UCOL_TERTIARY }, 399 { UCOL_CASE_LEVEL, UCOL_OFF }, 400 { UCOL_CASE_FIRST, UCOL_OFF }, 401 { UCOL_NUMERIC_COLLATION, UCOL_OFF }, 402 // We do not check UCOL_NORMALIZATION_MODE status since FCD normalization does nothing for ASCII strings. 403 }; 404 405 for (auto& pair : attributes) { 406 UErrorCode status = U_ZERO_ERROR; 407 auto result = ucol_getAttribute(&collator, pair.first, &status); 408 ASSERT(U_SUCCESS(status)); 409 if (result != pair.second) 410 return false; 411 } 412 413 // Check existence of tailoring rules. If they do not exist, collation algorithm is UCA DUCET. 414 int32_t length = 0; 415 ucol_getRules(&collator, &length); 416 return !length; 417 } 418 419 bool IntlCollator::updateCanDoASCIIUCADUCETComparison() const 420 { 421 // ICU uses the CLDR root collation order as a default starting point for ordering. (The CLDR root collation is based on the UCA DUCET.) 422 // And customizes this root collation via rules. 423 // The root collation is UCA DUCET and it is code-point comparison if the characters are all ASCII. 424 // http://www.unicode.org/reports/tr10/ 425 ASSERT(m_collator); 426 auto checkASCIIUCADUCETComparisonCompatibility = [&] { 427 if (m_usage != Usage::Sort) 428 return false; 429 if (m_collation != "default"_s) 430 return false; 431 if (m_sensitivity != Sensitivity::Variant) 432 return false; 433 if (m_caseFirst != CaseFirst::False) 434 return false; 435 if (m_numeric) 436 return false; 437 if (m_ignorePunctuation) 438 return false; 439 return canDoASCIIUCADUCETComparisonWithUCollator(*m_collator); 440 }; 441 bool result = checkASCIIUCADUCETComparisonCompatibility(); 442 m_canDoASCIIUCADUCETComparison = triState(result); 443 return result; 444 } 445 446 #if ASSERT_ENABLED 447 void IntlCollator::checkICULocaleInvariants(const HashSet<String>& locales) 448 { 449 for (auto& locale : locales) { 450 auto checkASCIIOrderingWithDUCET = [](const String& locale, UCollator& collator) { 451 bool allAreGood = true; 452 for (unsigned x = 0; x < 128; ++x) { 453 for (unsigned y = 0; y < 128; ++y) { 454 if (canUseASCIIUCADUCETComparison(x) && canUseASCIIUCADUCETComparison(y)) { 455 UErrorCode status = U_ZERO_ERROR; 456 UChar xstring[] = { static_cast<UChar>(x), 0 }; 457 UChar ystring[] = { static_cast<UChar>(y), 0 }; 458 auto resultICU = ucol_strcoll(&collator, xstring, 1, ystring, 1); 459 ASSERT(U_SUCCESS(status)); 460 auto resultJSC = compareASCIIWithUCADUCET(xstring, 1, ystring, 1); 461 if (resultICU != resultJSC) { 462 dataLogLn("BAD ", locale, " ", makeString(hex(x)), "(", StringView(xstring, 1), ") <=> ", makeString(hex(y)), "(", StringView(ystring, 1), ") ICU:(", static_cast<int32_t>(resultICU), "),JSC:(", static_cast<int32_t>(resultJSC), ")"); 463 allAreGood = false; 464 } 465 } 466 } 467 } 468 return allAreGood; 469 }; 470 471 UErrorCode status = U_ZERO_ERROR; 472 auto collator = std::unique_ptr<UCollator, ICUDeleter<ucol_close>>(ucol_open(locale.ascii().data(), &status)); 473 474 ASSERT(U_SUCCESS(status)); 475 ucol_setAttribute(collator.get(), UCOL_STRENGTH, UCOL_TERTIARY, &status); 476 ASSERT(U_SUCCESS(status)); 477 ucol_setAttribute(collator.get(), UCOL_CASE_LEVEL, UCOL_OFF, &status); 478 ASSERT(U_SUCCESS(status)); 479 ucol_setAttribute(collator.get(), UCOL_CASE_FIRST, UCOL_OFF, &status); 480 ASSERT(U_SUCCESS(status)); 481 ucol_setAttribute(collator.get(), UCOL_NUMERIC_COLLATION, UCOL_OFF, &status); 482 ASSERT(U_SUCCESS(status)); 483 ucol_setAttribute(collator.get(), UCOL_ALTERNATE_HANDLING, UCOL_DEFAULT, &status); 484 ASSERT(U_SUCCESS(status)); 485 ucol_setAttribute(collator.get(), UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 486 ASSERT(U_SUCCESS(status)); 487 488 if (!canDoASCIIUCADUCETComparisonWithUCollator(*collator)) 489 continue; 490 491 // This should not have reorder. 492 int32_t length = ucol_getReorderCodes(collator.get(), nullptr, 0, &status); 493 ASSERT(U_SUCCESS(status)); 494 ASSERT(!length); 495 496 // Contractions and Expansions are defined as a rule. If there is no tailoring rule, then they should be UCA DUCET's default. 497 498 auto ensureNotIncludingASCII = [&](USet& set) { 499 Vector<UChar, 32> buffer; 500 for (int32_t index = 0, count = uset_getItemCount(&set); index < count; ++index) { 501 // start and end are inclusive. 502 UChar32 start = 0; 503 UChar32 end = 0; 504 auto status = callBufferProducingFunction(uset_getItem, &set, index, &start, &end, buffer); 505 ASSERT(U_SUCCESS(status)); 506 if (buffer.isEmpty()) { 507 if (isASCII(start)) { 508 dataLogLn("BAD ", locale, " including ASCII tailored characters"); 509 CRASH(); 510 } 511 } else { 512 if (StringView(buffer.data(), buffer.size()).isAllASCII()) { 513 dataLogLn("BAD ", locale, " ", String(buffer.data(), buffer.size()), " including ASCII tailored characters"); 514 CRASH(); 515 } 516 } 517 } 518 }; 519 520 auto contractions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty()); 521 auto expansions = std::unique_ptr<USet, ICUDeleter<uset_close>>(uset_openEmpty()); 522 ucol_getContractionsAndExpansions(collator.get(), contractions.get(), expansions.get(), true, &status); 523 ASSERT(U_SUCCESS(status)); 524 525 ensureNotIncludingASCII(*contractions); 526 ensureNotIncludingASCII(*expansions); 527 528 // This locale should not have tailoring. 529 auto tailored = std::unique_ptr<USet, ICUDeleter<uset_close>>(ucol_getTailoredSet(collator.get(), &status)); 530 ensureNotIncludingASCII(*tailored); 531 532 dataLogLnIf(IntlCollatorInternal::verbose, "LOCALE ", locale); 533 534 ASSERT(checkASCIIOrderingWithDUCET(locale, *collator)); 535 } 536 } 537 #endif 538 539 } // namespace JSC