/ runtime / IntlSegmenter.cpp
IntlSegmenter.cpp
  1  /*
  2   * Copyright (C) 2020 Apple Inc. All rights reserved.
  3   *
  4   * Redistribution and use in source and binary forms, with or without
  5   * modification, are permitted provided that the following conditions
  6   * are met:
  7   * 1. Redistributions of source code must retain the above copyright
  8   *    notice, this list of conditions and the following disclaimer.
  9   * 2. Redistributions in binary form must reproduce the above copyright
 10   *    notice, this list of conditions and the following disclaimer in the
 11   *    documentation and/or other materials provided with the distribution.
 12   *
 13   * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
 14   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 15   * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 16   * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
 17   * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 18   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 19   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 20   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 21   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 22   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 23   * THE POSSIBILITY OF SUCH DAMAGE.
 24   */
 25  
 26  #include "config.h"
 27  #include "IntlSegmenter.h"
 28  
 29  #include "IntlObjectInlines.h"
 30  #include "IntlSegments.h"
 31  #include "JSCInlines.h"
 32  #include "ObjectConstructor.h"
 33  
 34  namespace JSC {
 35  
 36  const ClassInfo IntlSegmenter::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(IntlSegmenter) };
 37  
 38  IntlSegmenter* IntlSegmenter::create(VM& vm, Structure* structure)
 39  {
 40      auto* object = new (NotNull, allocateCell<IntlSegmenter>(vm.heap)) IntlSegmenter(vm, structure);
 41      object->finishCreation(vm);
 42      return object;
 43  }
 44  
 45  Structure* IntlSegmenter::createStructure(VM& vm, JSGlobalObject* globalObject, JSValue prototype)
 46  {
 47      return Structure::create(vm, globalObject, prototype, TypeInfo(ObjectType, StructureFlags), info());
 48  }
 49  
 50  IntlSegmenter::IntlSegmenter(VM& vm, Structure* structure)
 51      : Base(vm, structure)
 52  {
 53  }
 54  
 55  void IntlSegmenter::finishCreation(VM& vm)
 56  {
 57      Base::finishCreation(vm);
 58      ASSERT(inherits(vm, info()));
 59  }
 60  
 61  // https://tc39.es/proposal-intl-segmenter/#sec-intl.segmenter
 62  void IntlSegmenter::initializeSegmenter(JSGlobalObject* globalObject, JSValue locales, JSValue optionsValue)
 63  {
 64      VM& vm = globalObject->vm();
 65      auto scope = DECLARE_THROW_SCOPE(vm);
 66  
 67      auto requestedLocales = canonicalizeLocaleList(globalObject, locales);
 68      RETURN_IF_EXCEPTION(scope, void());
 69  
 70      JSObject* options;
 71      if (optionsValue.isUndefined())
 72          options = constructEmptyObject(vm, globalObject->nullPrototypeObjectStructure());
 73      else {
 74          options = optionsValue.toObject(globalObject);
 75          RETURN_IF_EXCEPTION(scope, void());
 76      }
 77  
 78      ResolveLocaleOptions localeOptions;
 79  
 80      LocaleMatcher localeMatcher = intlOption<LocaleMatcher>(globalObject, options, vm.propertyNames->localeMatcher, { { "lookup"_s, LocaleMatcher::Lookup }, { "best fit"_s, LocaleMatcher::BestFit } }, "localeMatcher must be either \"lookup\" or \"best fit\""_s, LocaleMatcher::BestFit);
 81      RETURN_IF_EXCEPTION(scope, void());
 82  
 83      auto localeData = [](const String&, RelevantExtensionKey) -> Vector<String> {
 84          return { };
 85      };
 86  
 87      auto& availableLocales = intlSegmenterAvailableLocales();
 88      auto resolved = resolveLocale(globalObject, availableLocales, requestedLocales, localeMatcher, localeOptions, { }, localeData);
 89  
 90      m_locale = resolved.locale;
 91      if (m_locale.isEmpty()) {
 92          throwTypeError(globalObject, scope, "failed to initialize Segmenter due to invalid locale"_s);
 93          return;
 94      }
 95  
 96      m_granularity = intlOption<Granularity>(globalObject, options, vm.propertyNames->granularity, { { "grapheme"_s, Granularity::Grapheme }, { "word"_s, Granularity::Word }, { "sentence"_s, Granularity::Sentence } }, "granularity must be either \"grapheme\", \"word\", or \"sentence\""_s, Granularity::Grapheme);
 97      RETURN_IF_EXCEPTION(scope, void());
 98  
 99      UBreakIteratorType type = UBRK_CHARACTER;
100      switch (m_granularity) {
101      case Granularity::Grapheme:
102          type = UBRK_CHARACTER;
103          break;
104      case Granularity::Word:
105          type = UBRK_WORD;
106          break;
107      case Granularity::Sentence:
108          type = UBRK_SENTENCE;
109          break;
110      }
111  
112      UErrorCode status = U_ZERO_ERROR;
113      m_segmenter = std::unique_ptr<UBreakIterator, UBreakIteratorDeleter>(ubrk_open(type, m_locale.utf8().data(), nullptr, 0, &status));
114      if (U_FAILURE(status)) {
115          throwTypeError(globalObject, scope, "failed to initialize Segmenter"_s);
116          return;
117      }
118  }
119  
120  // https://tc39.es/proposal-intl-segmenter/#sec-intl.segmenter.prototype.segment
121  JSValue IntlSegmenter::segment(JSGlobalObject* globalObject, JSValue stringValue) const
122  {
123      VM& vm = globalObject->vm();
124      auto scope = DECLARE_THROW_SCOPE(vm);
125  
126      JSString* jsString = stringValue.toString(globalObject);
127      RETURN_IF_EXCEPTION(scope, { });
128      String string = jsString->value(globalObject);
129      RETURN_IF_EXCEPTION(scope, { });
130      auto upconvertedCharacters = Box<Vector<UChar>>::create(string.charactersWithoutNullTermination());
131  
132      UErrorCode status = U_ZERO_ERROR;
133      auto segmenter = std::unique_ptr<UBreakIterator, UBreakIteratorDeleter>(ubrk_safeClone(m_segmenter.get(), nullptr, nullptr, &status));
134      if (U_FAILURE(status)) {
135          throwTypeError(globalObject, scope, "failed to initialize Segments"_s);
136          return { };
137      }
138      ubrk_setText(segmenter.get(), upconvertedCharacters->data(), upconvertedCharacters->size(), &status);
139      if (U_FAILURE(status)) {
140          throwTypeError(globalObject, scope, "failed to initialize Segments"_s);
141          return { };
142      }
143  
144      return IntlSegments::create(vm, globalObject->segmentsStructure(), WTFMove(segmenter), WTFMove(upconvertedCharacters), jsString, m_granularity);
145  }
146  
147  // https://tc39.es/proposal-intl-segmenter/#sec-intl.segmenter.prototype.resolvedoptions
148  JSObject* IntlSegmenter::resolvedOptions(JSGlobalObject* globalObject) const
149  {
150      VM& vm = globalObject->vm();
151      JSObject* options = constructEmptyObject(globalObject);
152      options->putDirect(vm, vm.propertyNames->locale, jsString(vm, m_locale));
153      options->putDirect(vm, vm.propertyNames->granularity, jsNontrivialString(vm, granularityString(m_granularity)));
154      return options;
155  }
156  
157  ASCIILiteral IntlSegmenter::granularityString(Granularity granularity)
158  {
159      switch (granularity) {
160      case Granularity::Grapheme:
161          return "grapheme"_s;
162      case Granularity::Word:
163          return "word"_s;
164      case Granularity::Sentence:
165          return "sentence"_s;
166      }
167      ASSERT_NOT_REACHED();
168      return ASCIILiteral::null();
169  }
170  
171  JSObject* IntlSegmenter::createSegmentDataObject(JSGlobalObject* globalObject, JSString* string, int32_t startIndex, int32_t endIndex, UBreakIterator& segmenter, Granularity granularity)
172  {
173      VM& vm = globalObject->vm();
174      JSObject* result = constructEmptyObject(globalObject);
175      result->putDirect(vm, vm.propertyNames->segment, jsSubstring(globalObject, string, startIndex, endIndex - startIndex));
176      result->putDirect(vm, vm.propertyNames->index, jsNumber(startIndex));
177      result->putDirect(vm, vm.propertyNames->input, string);
178      if (granularity == IntlSegmenter::Granularity::Word) {
179          int32_t ruleStatus = ubrk_getRuleStatus(&segmenter);
180          result->putDirect(vm, vm.propertyNames->isWordLike, jsBoolean(!(ruleStatus >= UBRK_WORD_NONE && ruleStatus <= UBRK_WORD_NONE_LIMIT)));
181      }
182      return result;
183  }
184  
185  } // namespace JSC