/ runtime / RegExpPrototype.cpp
RegExpPrototype.cpp
  1  /*
  2   *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
  3   *  Copyright (C) 2003-2020 Apple Inc. All Rights Reserved.
  4   *
  5   *  This library is free software; you can redistribute it and/or
  6   *  modify it under the terms of the GNU Lesser General Public
  7   *  License as published by the Free Software Foundation; either
  8   *  version 2 of the License, or (at your option) any later version.
  9   *
 10   *  This library is distributed in the hope that it will be useful,
 11   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13   *  Lesser General Public License for more details.
 14   *
 15   *  You should have received a copy of the GNU Lesser General Public
 16   *  License along with this library; if not, write to the Free Software
 17   *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 18   *
 19   */
 20  
 21  #include "config.h"
 22  #include "RegExpPrototype.h"
 23  
 24  #include "IntegrityInlines.h"
 25  #include "JSArray.h"
 26  #include "JSCBuiltins.h"
 27  #include "JSCJSValue.h"
 28  #include "JSGlobalObject.h"
 29  #include "JSStringInlines.h"
 30  #include "Lexer.h"
 31  #include "RegExpObject.h"
 32  #include "RegExpObjectInlines.h"
 33  #include "StringRecursionChecker.h"
 34  #include "YarrFlags.h"
 35  #include <wtf/text/StringBuilder.h>
 36  
 37  namespace JSC {
 38  
 39  static JSC_DECLARE_HOST_FUNCTION(regExpProtoFuncExec);
 40  static JSC_DECLARE_HOST_FUNCTION(regExpProtoFuncCompile);
 41  static JSC_DECLARE_HOST_FUNCTION(regExpProtoFuncToString);
 42  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterGlobal);
 43  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterIgnoreCase);
 44  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterMultiline);
 45  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterDotAll);
 46  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterSticky);
 47  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterUnicode);
 48  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterSource);
 49  static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterFlags);
 50  
 51  const ClassInfo RegExpPrototype::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(RegExpPrototype) };
 52  
 53  RegExpPrototype::RegExpPrototype(VM& vm, Structure* structure)
 54      : JSNonFinalObject(vm, structure)
 55  {
 56  }
 57  
 58  void RegExpPrototype::finishCreation(VM& vm, JSGlobalObject* globalObject)
 59  {
 60      Base::finishCreation(vm);
 61      ASSERT(inherits(vm, info()));
 62      JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->compile, regExpProtoFuncCompile, static_cast<unsigned>(PropertyAttribute::DontEnum), 2);
 63      JSC_NATIVE_INTRINSIC_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->exec, regExpProtoFuncExec, static_cast<unsigned>(PropertyAttribute::DontEnum), 1, RegExpExecIntrinsic);
 64      JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->toString, regExpProtoFuncToString, static_cast<unsigned>(PropertyAttribute::DontEnum), 0);
 65      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->global, regExpProtoGetterGlobal, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 66      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->dotAll, regExpProtoGetterDotAll, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 67      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->ignoreCase, regExpProtoGetterIgnoreCase, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 68      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->multiline, regExpProtoGetterMultiline, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 69      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->sticky, regExpProtoGetterSticky, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 70      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->unicode, regExpProtoGetterUnicode, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 71      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->source, regExpProtoGetterSource, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 72      JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->flags, regExpProtoGetterFlags, PropertyAttribute::DontEnum | PropertyAttribute::Accessor);
 73      JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->matchSymbol, regExpPrototypeMatchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
 74      JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->matchAllSymbol, regExpPrototypeMatchAllCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
 75      JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->replaceSymbol, regExpPrototypeReplaceCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
 76      JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->searchSymbol, regExpPrototypeSearchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
 77      JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->splitSymbol, regExpPrototypeSplitCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
 78      JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->test, regExpPrototypeTestCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum));
 79  }
 80  
 81  // ------------------------------ Functions ---------------------------
 82  
 83  JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncTestFast, (JSGlobalObject* globalObject, CallFrame* callFrame))
 84  {
 85      VM& vm = globalObject->vm();
 86      auto scope = DECLARE_THROW_SCOPE(vm);
 87  
 88      JSValue thisValue = callFrame->thisValue();
 89      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
 90      if (UNLIKELY(!regexp))
 91          return throwVMTypeError(globalObject, scope);
 92      JSString* string = callFrame->argument(0).toStringOrNull(globalObject);
 93      EXCEPTION_ASSERT(!!scope.exception() == !string);
 94      if (!string)
 95          return JSValue::encode(jsUndefined());
 96      RELEASE_AND_RETURN(scope, JSValue::encode(jsBoolean(regexp->test(globalObject, string))));
 97  }
 98  
 99  JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncExec, (JSGlobalObject* globalObject, CallFrame* callFrame))
100  {
101      VM& vm = globalObject->vm();
102      auto scope = DECLARE_THROW_SCOPE(vm);
103  
104      JSValue thisValue = callFrame->thisValue();
105      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
106      if (UNLIKELY(!regexp))
107          return throwVMTypeError(globalObject, scope, "Builtin RegExp exec can only be called on a RegExp object");
108      JSString* string = callFrame->argument(0).toStringOrNull(globalObject);
109      EXCEPTION_ASSERT(!!scope.exception() == !string);
110      if (!string)
111          return JSValue::encode(jsUndefined());
112      RELEASE_AND_RETURN(scope, JSValue::encode(regexp->exec(globalObject, string)));
113  }
114  
115  JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncMatchFast, (JSGlobalObject* globalObject, CallFrame* callFrame))
116  {
117      RegExpObject* thisObject = jsCast<RegExpObject*>(callFrame->thisValue());
118      JSString* string = jsCast<JSString*>(callFrame->uncheckedArgument(0));
119      if (!thisObject->regExp()->global())
120          return JSValue::encode(thisObject->exec(globalObject, string));
121      return JSValue::encode(thisObject->matchGlobal(globalObject, string));
122  }
123  
124  JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncCompile, (JSGlobalObject* globalObject, CallFrame* callFrame))
125  {
126      VM& vm = globalObject->vm();
127      auto scope = DECLARE_THROW_SCOPE(vm);
128  
129      JSValue thisValue = callFrame->thisValue();
130      auto* thisRegExp = jsDynamicCast<RegExpObject*>(vm, thisValue);
131      if (UNLIKELY(!thisRegExp))
132          return throwVMTypeError(globalObject, scope);
133  
134      RegExp* regExp;
135      JSValue arg0 = callFrame->argument(0);
136      JSValue arg1 = callFrame->argument(1);
137      
138      if (auto* regExpObject = jsDynamicCast<RegExpObject*>(vm, arg0)) {
139          if (!arg1.isUndefined())
140              return throwVMTypeError(globalObject, scope, "Cannot supply flags when constructing one RegExp from another."_s);
141          regExp = regExpObject->regExp();
142      } else {
143          String pattern = arg0.isUndefined() ? emptyString() : arg0.toWTFString(globalObject);
144          RETURN_IF_EXCEPTION(scope, encodedJSValue());
145  
146          auto flags = arg1.isUndefined() ? makeOptional(OptionSet<Yarr::Flags> { }) : Yarr::parseFlags(arg1.toWTFString(globalObject));
147          RETURN_IF_EXCEPTION(scope, encodedJSValue());
148          if (!flags)
149              return throwVMError(globalObject, scope, createSyntaxError(globalObject, "Invalid flags supplied to RegExp constructor."_s));
150  
151          regExp = RegExp::create(vm, pattern, flags.value());
152      }
153  
154      if (!regExp->isValid())
155          return throwVMError(globalObject, scope, regExp->errorToThrow(globalObject));
156  
157      thisRegExp->setRegExp(vm, regExp);
158      scope.release();
159      thisRegExp->setLastIndex(globalObject, 0);
160      return JSValue::encode(thisRegExp);
161  }
162  
163  typedef std::array<char, 6 + 1> FlagsString; // 6 different flags and a null character terminator.
164  
165  static inline FlagsString flagsString(JSGlobalObject* globalObject, JSObject* regexp)
166  {
167      FlagsString string;
168      string[0] = 0;
169  
170      VM& vm = globalObject->vm();
171      auto scope = DECLARE_THROW_SCOPE(vm);
172  
173      JSValue globalValue = regexp->get(globalObject, vm.propertyNames->global);
174      RETURN_IF_EXCEPTION(scope, string);
175      JSValue ignoreCaseValue = regexp->get(globalObject, vm.propertyNames->ignoreCase);
176      RETURN_IF_EXCEPTION(scope, string);
177      JSValue multilineValue = regexp->get(globalObject, vm.propertyNames->multiline);
178      RETURN_IF_EXCEPTION(scope, string);
179      JSValue dotAllValue = regexp->get(globalObject, vm.propertyNames->dotAll);
180      RETURN_IF_EXCEPTION(scope, string);
181      JSValue unicodeValue = regexp->get(globalObject, vm.propertyNames->unicode);
182      RETURN_IF_EXCEPTION(scope, string);
183      JSValue stickyValue = regexp->get(globalObject, vm.propertyNames->sticky);
184      RETURN_IF_EXCEPTION(scope, string);
185  
186      unsigned index = 0;
187      if (globalValue.toBoolean(globalObject))
188          string[index++] = 'g';
189      if (ignoreCaseValue.toBoolean(globalObject))
190          string[index++] = 'i';
191      if (multilineValue.toBoolean(globalObject))
192          string[index++] = 'm';
193      if (dotAllValue.toBoolean(globalObject))
194          string[index++] = 's';
195      if (unicodeValue.toBoolean(globalObject))
196          string[index++] = 'u';
197      if (stickyValue.toBoolean(globalObject))
198          string[index++] = 'y';
199      ASSERT(index < string.size());
200      string[index] = 0;
201      return string;
202  }
203  
204  JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncToString, (JSGlobalObject* globalObject, CallFrame* callFrame))
205  {
206      VM& vm = globalObject->vm();
207      auto scope = DECLARE_THROW_SCOPE(vm);
208  
209      JSValue thisValue = callFrame->thisValue().toThis(globalObject, ECMAMode::strict());
210      if (!thisValue.isObject())
211          return throwVMTypeError(globalObject, scope);
212  
213      JSObject* thisObject = asObject(thisValue);
214      Integrity::auditStructureID(vm, thisObject->structureID());
215  
216      StringRecursionChecker checker(globalObject, thisObject);
217      EXCEPTION_ASSERT(!scope.exception() || checker.earlyReturnValue());
218      if (JSValue earlyReturnValue = checker.earlyReturnValue())
219          return JSValue::encode(earlyReturnValue);
220  
221      JSValue sourceValue = thisObject->get(globalObject, vm.propertyNames->source);
222      RETURN_IF_EXCEPTION(scope, encodedJSValue());
223      String source = sourceValue.toWTFString(globalObject);
224      RETURN_IF_EXCEPTION(scope, encodedJSValue());
225  
226      JSValue flagsValue = thisObject->get(globalObject, vm.propertyNames->flags);
227      RETURN_IF_EXCEPTION(scope, encodedJSValue());
228      String flags = flagsValue.toWTFString(globalObject);
229      RETURN_IF_EXCEPTION(scope, encodedJSValue());
230  
231      RELEASE_AND_RETURN(scope, JSValue::encode(jsMakeNontrivialString(globalObject, '/', source, '/', flags)));
232  }
233  
234  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterGlobal, (JSGlobalObject* globalObject, CallFrame* callFrame))
235  {
236      VM& vm = globalObject->vm();
237      auto scope = DECLARE_THROW_SCOPE(vm);
238  
239      JSValue thisValue = callFrame->thisValue();
240      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
241      if (UNLIKELY(!regexp)) {
242          if (thisValue == globalObject->regExpPrototype())
243              return JSValue::encode(jsUndefined());
244          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.global getter can only be called on a RegExp object"_s);
245      }
246  
247      return JSValue::encode(jsBoolean(regexp->regExp()->global()));
248  }
249  
250  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterIgnoreCase, (JSGlobalObject* globalObject, CallFrame* callFrame))
251  {
252      VM& vm = globalObject->vm();
253      auto scope = DECLARE_THROW_SCOPE(vm);
254  
255      JSValue thisValue = callFrame->thisValue();
256      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
257      if (UNLIKELY(!regexp)) {
258          if (thisValue == globalObject->regExpPrototype())
259              return JSValue::encode(jsUndefined());
260          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.ignoreCase getter can only be called on a RegExp object"_s);
261      }
262  
263      return JSValue::encode(jsBoolean(regexp->regExp()->ignoreCase()));
264  }
265  
266  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterMultiline, (JSGlobalObject* globalObject, CallFrame* callFrame))
267  {
268      VM& vm = globalObject->vm();
269      auto scope = DECLARE_THROW_SCOPE(vm);
270  
271      JSValue thisValue = callFrame->thisValue();
272      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
273      if (UNLIKELY(!regexp)) {
274          if (thisValue == globalObject->regExpPrototype())
275              return JSValue::encode(jsUndefined());
276          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.multiline getter can only be called on a RegExp object"_s);
277      }
278  
279      return JSValue::encode(jsBoolean(regexp->regExp()->multiline()));
280  }
281  
282  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterDotAll, (JSGlobalObject* globalObject, CallFrame* callFrame))
283  {
284      VM& vm = globalObject->vm();
285      auto scope = DECLARE_THROW_SCOPE(vm);
286      
287      JSValue thisValue = callFrame->thisValue();
288      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
289      if (UNLIKELY(!regexp)) {
290          if (thisValue == globalObject->regExpPrototype())
291              return JSValue::encode(jsUndefined());
292          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.dotAll getter can only be called on a RegExp object"_s);
293      }
294      
295      return JSValue::encode(jsBoolean(regexp->regExp()->dotAll()));
296  }
297      
298  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterSticky, (JSGlobalObject* globalObject, CallFrame* callFrame))
299  {
300      VM& vm = globalObject->vm();
301      auto scope = DECLARE_THROW_SCOPE(vm);
302  
303      JSValue thisValue = callFrame->thisValue();
304      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
305      if (UNLIKELY(!regexp)) {
306          if (thisValue == globalObject->regExpPrototype())
307              return JSValue::encode(jsUndefined());
308          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.sticky getter can only be called on a RegExp object"_s);
309      }
310      
311      return JSValue::encode(jsBoolean(regexp->regExp()->sticky()));
312  }
313  
314  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterUnicode, (JSGlobalObject* globalObject, CallFrame* callFrame))
315  {
316      VM& vm = globalObject->vm();
317      auto scope = DECLARE_THROW_SCOPE(vm);
318  
319      JSValue thisValue = callFrame->thisValue();
320      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
321      if (UNLIKELY(!regexp)) {
322          if (thisValue == globalObject->regExpPrototype())
323              return JSValue::encode(jsUndefined());
324          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.unicode getter can only be called on a RegExp object"_s);
325      }
326      
327      return JSValue::encode(jsBoolean(regexp->regExp()->unicode()));
328  }
329  
330  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterFlags, (JSGlobalObject* globalObject, CallFrame* callFrame))
331  {
332      VM& vm = globalObject->vm();
333      auto scope = DECLARE_THROW_SCOPE(vm);
334  
335      JSValue thisValue = callFrame->thisValue();
336      if (UNLIKELY(!thisValue.isObject()))
337          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.flags getter can only be called on an object"_s);
338  
339      auto flags = flagsString(globalObject, asObject(thisValue));
340      RETURN_IF_EXCEPTION(scope, encodedJSValue());
341  
342      return JSValue::encode(jsString(vm, flags.data()));
343  }
344  
345  template <typename CharacterType>
346  static inline void appendLineTerminatorEscape(StringBuilder&, CharacterType);
347  
348  template <>
349  inline void appendLineTerminatorEscape<LChar>(StringBuilder& builder, LChar lineTerminator)
350  {
351      if (lineTerminator == '\n')
352          builder.append('n');
353      else
354          builder.append('r');
355  }
356  
357  template <>
358  inline void appendLineTerminatorEscape<UChar>(StringBuilder& builder, UChar lineTerminator)
359  {
360      if (lineTerminator == '\n')
361          builder.append('n');
362      else if (lineTerminator == '\r')
363          builder.append('r');
364      else if (lineTerminator == 0x2028)
365          builder.appendLiteral("u2028");
366      else
367          builder.appendLiteral("u2029");
368  }
369  
370  template <typename CharacterType>
371  static inline JSValue regExpProtoGetterSourceInternal(JSGlobalObject* globalObject, const String& pattern, const CharacterType* characters, unsigned length)
372  {
373      VM& vm = globalObject->vm();
374      bool previousCharacterWasBackslash = false;
375      bool inBrackets = false;
376      bool shouldEscape = false;
377  
378      // 15.10.6.4 specifies that RegExp.prototype.toString must return '/' + source + '/',
379      // and also states that the result must be a valid RegularExpressionLiteral. '//' is
380      // not a valid RegularExpressionLiteral (since it is a single line comment), and hence
381      // source cannot ever validly be "". If the source is empty, return a different Pattern
382      // that would match the same thing.
383      if (!length)
384          return jsNontrivialString(vm, "(?:)"_s);
385  
386      // early return for strings that don't contain a forwards slash and LineTerminator
387      for (unsigned i = 0; i < length; ++i) {
388          CharacterType ch = characters[i];
389          if (!previousCharacterWasBackslash) {
390              if (inBrackets) {
391                  if (ch == ']')
392                      inBrackets = false;
393              } else {
394                  if (ch == '/') {
395                      shouldEscape = true;
396                      break;
397                  }
398                  if (ch == '[')
399                      inBrackets = true;
400              }
401          }
402  
403          if (Lexer<CharacterType>::isLineTerminator(ch)) {
404              shouldEscape = true;
405              break;
406          }
407  
408          if (previousCharacterWasBackslash)
409              previousCharacterWasBackslash = false;
410          else
411              previousCharacterWasBackslash = ch == '\\';
412      }
413  
414      if (!shouldEscape)
415          return jsString(vm, pattern);
416  
417      previousCharacterWasBackslash = false;
418      inBrackets = false;
419      StringBuilder result;
420      for (unsigned i = 0; i < length; ++i) {
421          CharacterType ch = characters[i];
422          if (!previousCharacterWasBackslash) {
423              if (inBrackets) {
424                  if (ch == ']')
425                      inBrackets = false;
426              } else {
427                  if (ch == '/')
428                      result.append('\\');
429                  else if (ch == '[')
430                      inBrackets = true;
431              }
432          }
433  
434          // escape LineTerminator
435          if (Lexer<CharacterType>::isLineTerminator(ch)) {
436              if (!previousCharacterWasBackslash)
437                  result.append('\\');
438  
439              appendLineTerminatorEscape<CharacterType>(result, ch);
440          } else
441              result.append(ch);
442  
443          if (previousCharacterWasBackslash)
444              previousCharacterWasBackslash = false;
445          else
446              previousCharacterWasBackslash = ch == '\\';
447      }
448  
449      return jsString(vm, result.toString());
450  }
451  
452  JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterSource, (JSGlobalObject* globalObject, CallFrame* callFrame))
453  {
454      VM& vm = globalObject->vm();
455      auto scope = DECLARE_THROW_SCOPE(vm);
456  
457      JSValue thisValue = callFrame->thisValue();
458      auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue);
459      if (UNLIKELY(!regexp)) {
460          if (thisValue == globalObject->regExpPrototype())
461              return JSValue::encode(jsNontrivialString(vm, "(?:)"_s));
462          return throwVMTypeError(globalObject, scope, "The RegExp.prototype.source getter can only be called on a RegExp object"_s);
463      }
464  
465      String pattern = regexp->regExp()->pattern();
466      if (pattern.is8Bit())
467          return JSValue::encode(regExpProtoGetterSourceInternal(globalObject, pattern, pattern.characters8(), pattern.length()));
468      return JSValue::encode(regExpProtoGetterSourceInternal(globalObject, pattern, pattern.characters16(), pattern.length()));
469  }
470  
471  JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncSearchFast, (JSGlobalObject* globalObject, CallFrame* callFrame))
472  {
473      VM& vm = globalObject->vm();
474      auto scope = DECLARE_THROW_SCOPE(vm);
475      JSValue thisValue = callFrame->thisValue();
476      RegExp* regExp = jsCast<RegExpObject*>(thisValue)->regExp();
477  
478      JSString* string = callFrame->uncheckedArgument(0).toString(globalObject);
479      String s = string->value(globalObject);
480      RETURN_IF_EXCEPTION(scope, encodedJSValue());
481  
482      MatchResult result = globalObject->regExpGlobalData().performMatch(globalObject, regExp, string, s, 0);
483      RETURN_IF_EXCEPTION(scope, encodedJSValue());
484      return JSValue::encode(result ? jsNumber(result.start) : jsNumber(-1));
485  }
486  
487  static inline unsigned advanceStringIndex(String str, unsigned strSize, unsigned index, bool isUnicode)
488  {
489      if (!isUnicode)
490          return ++index;
491      return advanceStringUnicode(str, strSize, index);
492  }
493  
494  enum SplitControl {
495      ContinueSplit,
496      AbortSplit
497  };
498  
499  template<typename ControlFunc, typename PushFunc>
500  void genericSplit(
501      JSGlobalObject* globalObject, RegExp* regexp, const String& input, unsigned inputSize, unsigned& position,
502      unsigned& matchPosition, bool regExpIsSticky, bool regExpIsUnicode,
503      const ControlFunc& control, const PushFunc& push)
504  {
505      VM& vm = globalObject->vm();
506      auto scope = DECLARE_THROW_SCOPE(vm);
507      Vector<int> ovector;
508          
509      while (matchPosition < inputSize) {
510          {
511              auto result = control();
512              RETURN_IF_EXCEPTION(scope, void());
513              if (result == AbortSplit)
514                  return;
515          }
516          
517          ovector.shrink(0);
518          
519          // a. Perform ? Set(splitter, "lastIndex", q, true).
520          // b. Let z be ? RegExpExec(splitter, S).
521          int mpos = regexp->match(globalObject, input, matchPosition, ovector);
522          RETURN_IF_EXCEPTION(scope, void());
523  
524          // c. If z is null, let q be AdvanceStringIndex(S, q, unicodeMatching).
525          if (mpos < 0) {
526              if (!regExpIsSticky)
527                  break;
528              matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode);
529              continue;
530          }
531          if (static_cast<unsigned>(mpos) >= inputSize) {
532              // The spec redoes the RegExpExec starting at the next character of the input.
533              // But in our case, mpos < 0 means that the native regexp already searched all permutations
534              // and know that we won't be able to find a match for the separator even if we redo the
535              // RegExpExec starting at the next character of the input. So, just bail.
536              break;
537          }
538  
539          // d. Else, z is not null
540          //    i. Let e be ? ToLength(? Get(splitter, "lastIndex")).
541          //   ii. Let e be min(e, size).
542          matchPosition = mpos;
543          unsigned matchEnd = ovector[1];
544  
545          //  iii. If e = p, let q be AdvanceStringIndex(S, q, unicodeMatching).
546          if (matchEnd == position) {
547              matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode);
548              continue;
549          }
550          // if matchEnd == 0 then position should also be zero and thus matchEnd should equal position.
551          ASSERT(matchEnd);
552  
553          //   iv. Else e != p,
554          unsigned numberOfCaptures = regexp->numSubpatterns();
555          
556          // 1. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through q (exclusive).
557          // 2. Perform ! CreateDataProperty(A, ! ToString(lengthA), T).
558          {
559              auto result = push(true, position, matchPosition - position);
560              RETURN_IF_EXCEPTION(scope, void());
561              if (result == AbortSplit)
562                  return;
563          }
564          
565          // 5. Let p be e.
566          position = matchEnd;
567          
568          // 6. Let numberOfCaptures be ? ToLength(? Get(z, "length")).
569          // 7. Let numberOfCaptures be max(numberOfCaptures-1, 0).
570          // 8. Let i be 1.
571          // 9. Repeat, while i <= numberOfCaptures,
572          for (unsigned i = 1; i <= numberOfCaptures; ++i) {
573              // a. Let nextCapture be ? Get(z, ! ToString(i)).
574              // b. Perform ! CreateDataProperty(A, ! ToString(lengthA), nextCapture).
575              int sub = ovector[i * 2];
576              auto result = push(sub >= 0, sub, ovector[i * 2 + 1] - sub);
577              RETURN_IF_EXCEPTION(scope, void());
578              if (result == AbortSplit)
579                  return;
580          }
581          
582          // 10. Let q be p.
583          matchPosition = position;
584      }
585  }
586  
587  // ES 21.2.5.11 RegExp.prototype[@@split](string, limit)
588  JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncSplitFast, (JSGlobalObject* globalObject, CallFrame* callFrame))
589  {
590      VM& vm = globalObject->vm();
591      auto scope = DECLARE_THROW_SCOPE(vm);
592  
593      // 1. [handled by JS builtin] Let rx be the this value.
594      // 2. [handled by JS builtin] If Type(rx) is not Object, throw a TypeError exception.
595      JSValue thisValue = callFrame->thisValue();
596      RegExp* regexp = jsCast<RegExpObject*>(thisValue)->regExp();
597  
598      // 3. [handled by JS builtin] Let S be ? ToString(string).
599      JSString* inputString = callFrame->argument(0).toString(globalObject);
600      String input = inputString->value(globalObject);
601      RETURN_IF_EXCEPTION(scope, encodedJSValue());
602      ASSERT(!input.isNull());
603  
604      // 4. [handled by JS builtin] Let C be ? SpeciesConstructor(rx, %RegExp%).
605      // 5. [handled by JS builtin] Let flags be ? ToString(? Get(rx, "flags")).
606      // 6. [handled by JS builtin] If flags contains "u", let unicodeMatching be true.
607      // 7. [handled by JS builtin] Else, let unicodeMatching be false.
608      // 8. [handled by JS builtin] If flags contains "y", let newFlags be flags.
609      // 9. [handled by JS builtin] Else, let newFlags be the string that is the concatenation of flags and "y".
610      // 10. [handled by JS builtin] Let splitter be ? Construct(C, « rx, newFlags »).
611  
612      // 11. Let A be ArrayCreate(0).
613      // 12. Let lengthA be 0.
614      JSArray* result = constructEmptyArray(globalObject, nullptr);
615      RETURN_IF_EXCEPTION(scope, encodedJSValue());
616      unsigned resultLength = 0;
617  
618      // 13. If limit is undefined, let lim be 2^32-1; else let lim be ? ToUint32(limit).
619      JSValue limitValue = callFrame->argument(1);
620      unsigned limit = limitValue.isUndefined() ? 0xFFFFFFFFu : limitValue.toUInt32(globalObject);
621      RETURN_IF_EXCEPTION(scope, encodedJSValue());
622  
623      // 14. Let size be the number of elements in S.
624      unsigned inputSize = input.length();
625  
626      // 15. Let p = 0.
627      unsigned position = 0;
628  
629      // 16. If lim == 0, return A.
630      if (!limit)
631          return JSValue::encode(result);
632  
633      // 17. If size == 0, then
634      if (input.isEmpty()) {
635          // a. Let z be ? RegExpExec(splitter, S).
636          // b. If z is not null, return A.
637          // c. Perform ! CreateDataProperty(A, "0", S).
638          // d. Return A.
639          auto matchResult = regexp->match(globalObject, input, 0);
640          RETURN_IF_EXCEPTION(scope, encodedJSValue());
641          if (!matchResult) {
642              result->putDirectIndex(globalObject, 0, inputString);
643              RETURN_IF_EXCEPTION(scope, encodedJSValue());
644          }
645          return JSValue::encode(result);
646      }
647  
648      // 18. Let q = p.
649      unsigned matchPosition = position;
650      // 19. Repeat, while q < size
651      bool regExpIsSticky = regexp->sticky();
652      bool regExpIsUnicode = regexp->unicode();
653      
654      unsigned maxSizeForDirectPath = 100000;
655      
656      genericSplit(
657          globalObject, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
658          [&] () -> SplitControl {
659              if (resultLength >= maxSizeForDirectPath)
660                  return AbortSplit;
661              return ContinueSplit;
662          },
663          [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl {
664              result->putDirectIndex(globalObject, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined());
665              RETURN_IF_EXCEPTION(scope, AbortSplit);
666              if (resultLength >= limit)
667                  return AbortSplit;
668              return ContinueSplit;
669          });
670      RETURN_IF_EXCEPTION(scope, encodedJSValue());
671  
672      if (resultLength >= limit)
673          return JSValue::encode(result);
674      if (resultLength < maxSizeForDirectPath) {
675          // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive).
676          // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T).
677          scope.release();
678          result->putDirectIndex(globalObject, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position));
679          
680          // 22. Return A.
681          return JSValue::encode(result);
682      }
683      
684      // Now do a dry run to see how big things get. Give up if they get absurd.
685      unsigned savedPosition = position;
686      unsigned savedMatchPosition = matchPosition;
687      unsigned dryRunCount = 0;
688      genericSplit(
689          globalObject, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
690          [&] () -> SplitControl {
691              if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH)
692                  return AbortSplit;
693              return ContinueSplit;
694          },
695          [&] (bool, unsigned, unsigned) -> SplitControl {
696              dryRunCount++;
697              if (resultLength + dryRunCount >= limit)
698                  return AbortSplit;
699              return ContinueSplit;
700          });
701      RETURN_IF_EXCEPTION(scope, encodedJSValue());
702      
703      if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH) {
704          throwOutOfMemoryError(globalObject, scope);
705          return encodedJSValue();
706      }
707      
708      // OK, we know that if we finish the split, we won't have to OOM.
709      position = savedPosition;
710      matchPosition = savedMatchPosition;
711      
712      genericSplit(
713          globalObject, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode,
714          [&] () -> SplitControl {
715              return ContinueSplit;
716          },
717          [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl {
718              result->putDirectIndex(globalObject, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined());
719              RETURN_IF_EXCEPTION(scope, AbortSplit);
720              if (resultLength >= limit)
721                  return AbortSplit;
722              return ContinueSplit;
723          });
724      RETURN_IF_EXCEPTION(scope, encodedJSValue());
725  
726      if (resultLength >= limit)
727          return JSValue::encode(result);
728      
729      // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive).
730      // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T).
731      scope.release();
732      result->putDirectIndex(globalObject, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position));
733      // 22. Return A.
734      return JSValue::encode(result);
735  }
736  
737  } // namespace JSC