IntlSegmenter.cpp
1 /* 2 * Copyright (C) 2020 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' 14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS 17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 * THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "IntlSegmenter.h" 28 29 #include "IntlObjectInlines.h" 30 #include "IntlSegments.h" 31 #include "JSCInlines.h" 32 #include "ObjectConstructor.h" 33 34 namespace JSC { 35 36 const ClassInfo IntlSegmenter::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlSegmenter) }; 37 38 IntlSegmenter* IntlSegmenter::create(VM& vm, Structure* structure) 39 { 40 auto* object = new (NotNull, allocateCell<IntlSegmenter>(vm.heap)) IntlSegmenter(vm, structure); 41 object->finishCreation(vm); 42 return object; 43 } 44 45 Structure* IntlSegmenter::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype) 46 { 47 return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info()); 48 } 49 50 IntlSegmenter::IntlSegmenter(VM& vm, Structure* structure) 51 : Base(vm, structure) 52 { 53 } 54 55 void IntlSegmenter::finishCreation(VM& vm) 56 { 57 Base::finishCreation(vm); 58 ASSERT(inherits(vm, info())); 59 } 60 61 // https://tc39.es/proposal-intl-segmenter/#sec-intl.segmenter 62 void IntlSegmenter::initializeSegmenter(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue) 63 { 64 VM& vm = globalObject->vm(); 65 auto scope = DECLARE_THROW_SCOPE(vm); 66 67 auto requestedLocales = canonicalizeLocaleList(globalObject, locales); 68 RETURN_IF_EXCEPTION(scope, void()); 69 70 JSObject* options; 71 if (optionsValue.isUndefined()) 72 options = constructEmptyObject(vm, globalObject->nullPrototypeObjectStructure()); 73 else { 74 options = optionsValue.toObject(globalObject); 75 RETURN_IF_EXCEPTION(scope, void()); 76 } 77 78 ResolveLocaleOptions localeOptions; 79 80 LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit); 81 RETURN_IF_EXCEPTION(scope, void()); 82 83 auto localeData = [](const String&, RelevantExtensionKey) -> Vector<String> { 84 return { }; 85 }; 86 87 auto& availableLocales = intlSegmenterAvailableLocales(); 88 auto resolved = resolveLocale(globalObject, availableLocales, requestedLocales, localeMatcher, localeOptions, { }, localeData); 89 90 m_locale = resolved.locale; 91 if (m_locale.isEmpty()) { 92 throwTypeError(globalObject, scope, "failed to initialize Segmenter due to invalid locale"_s); 93 return; 94 } 95 96 m_granularity = intlOption<Granularity>(globalObject, options, vm.propertyNames->granularity, { { "grapheme"_s, Granularity::Grapheme }, { "word"_s, Granularity::Word }, { "sentence"_s, Granularity::Sentence } }, "granularity must be either \"grapheme\", \"word\", or \"sentence\""_s, Granularity::Grapheme); 97 RETURN_IF_EXCEPTION(scope, void()); 98 99 UBreakIteratorType type = UBRK_CHARACTER; 100 switch (m_granularity) { 101 case Granularity::Grapheme: 102 type = UBRK_CHARACTER; 103 break; 104 case Granularity::Word: 105 type = UBRK_WORD; 106 break; 107 case Granularity::Sentence: 108 type = UBRK_SENTENCE; 109 break; 110 } 111 112 UErrorCode status = U_ZERO_ERROR; 113 m_segmenter = std::unique_ptr<UBreakIterator, UBreakIteratorDeleter>(ubrk_open(type, m_locale.utf8().data(), nullptr, 0, &status)); 114 if (U_FAILURE(status)) { 115 throwTypeError(globalObject, scope, "failed to initialize Segmenter"_s); 116 return; 117 } 118 } 119 120 // https://tc39.es/proposal-intl-segmenter/#sec-intl.segmenter.prototype.segment 121 JSValue IntlSegmenter::segment(JSGlobalObject* globalObject, JSValue stringValue) const 122 { 123 VM& vm = globalObject->vm(); 124 auto scope = DECLARE_THROW_SCOPE(vm); 125 126 JSString* jsString = stringValue.toString(globalObject); 127 RETURN_IF_EXCEPTION(scope, { }); 128 String string = jsString->value(globalObject); 129 RETURN_IF_EXCEPTION(scope, { }); 130 auto upconvertedCharacters = Box<Vector<UChar>>::create(string.charactersWithoutNullTermination()); 131 132 UErrorCode status = U_ZERO_ERROR; 133 auto segmenter = std::unique_ptr<UBreakIterator, UBreakIteratorDeleter>(ubrk_safeClone(m_segmenter.get(), nullptr, nullptr, &status)); 134 if (U_FAILURE(status)) { 135 throwTypeError(globalObject, scope, "failed to initialize Segments"_s); 136 return { }; 137 } 138 ubrk_setText(segmenter.get(), upconvertedCharacters->data(), upconvertedCharacters->size(), &status); 139 if (U_FAILURE(status)) { 140 throwTypeError(globalObject, scope, "failed to initialize Segments"_s); 141 return { }; 142 } 143 144 return IntlSegments::create(vm, globalObject->segmentsStructure(), WTFMove(segmenter), WTFMove(upconvertedCharacters), jsString, m_granularity); 145 } 146 147 // https://tc39.es/proposal-intl-segmenter/#sec-intl.segmenter.prototype.resolvedoptions 148 JSObject* IntlSegmenter::resolvedOptions(JSGlobalObject* globalObject) const 149 { 150 VM& vm = globalObject->vm(); 151 JSObject* options = constructEmptyObject(globalObject); 152 options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale)); 153 options->putDirect(vm, vm.propertyNames->granularity, jsNontrivialString(vm, granularityString(m_granularity))); 154 return options; 155 } 156 157 ASCIILiteral IntlSegmenter::granularityString(Granularity granularity) 158 { 159 switch (granularity) { 160 case Granularity::Grapheme: 161 return "grapheme"_s; 162 case Granularity::Word: 163 return "word"_s; 164 case Granularity::Sentence: 165 return "sentence"_s; 166 } 167 ASSERT_NOT_REACHED(); 168 return ASCIILiteral::null(); 169 } 170 171 JSObject* IntlSegmenter::createSegmentDataObject(JSGlobalObject* globalObject, JSString* string, int32_t startIndex, int32_t endIndex, UBreakIterator& segmenter, Granularity granularity) 172 { 173 VM& vm = globalObject->vm(); 174 JSObject* result = constructEmptyObject(globalObject); 175 result->putDirect(vm, vm.propertyNames->segment, jsSubstring(globalObject, string, startIndex, endIndex - startIndex)); 176 result->putDirect(vm, vm.propertyNames->index, jsNumber(startIndex)); 177 result->putDirect(vm, vm.propertyNames->input, string); 178 if (granularity == IntlSegmenter::Granularity::Word) { 179 int32_t ruleStatus = ubrk_getRuleStatus(&segmenter); 180 result->putDirect(vm, vm.propertyNames->isWordLike, jsBoolean(!(ruleStatus >= UBRK_WORD_NONE && ruleStatus <= UBRK_WORD_NONE_LIMIT))); 181 } 182 return result; 183 } 184 185 } // namespace JSC