RegExpPrototype.cpp
1 /* 2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 3 * Copyright (C) 2003-2020 Apple Inc. All Rights Reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public 16 * License along with this library; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 */ 20 21 #include "config.h" 22 #include "RegExpPrototype.h" 23 24 #include "IntegrityInlines.h" 25 #include "JSArray.h" 26 #include "JSCBuiltins.h" 27 #include "JSCJSValue.h" 28 #include "JSGlobalObject.h" 29 #include "JSStringInlines.h" 30 #include "Lexer.h" 31 #include "RegExpObject.h" 32 #include "RegExpObjectInlines.h" 33 #include "StringRecursionChecker.h" 34 #include "YarrFlags.h" 35 #include <wtf/text/StringBuilder.h> 36 37 namespace JSC { 38 39 static JSC_DECLARE_HOST_FUNCTION(regExpProtoFuncExec); 40 static JSC_DECLARE_HOST_FUNCTION(regExpProtoFuncCompile); 41 static JSC_DECLARE_HOST_FUNCTION(regExpProtoFuncToString); 42 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterGlobal); 43 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterIgnoreCase); 44 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterMultiline); 45 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterDotAll); 46 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterSticky); 47 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterUnicode); 48 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterSource); 49 static JSC_DECLARE_HOST_FUNCTION(regExpProtoGetterFlags); 50 51 const ClassInfo RegExpPrototype::s_info = { "Object", &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(RegExpPrototype) }; 52 53 RegExpPrototype::RegExpPrototype(VM& vm, Structure* structure) 54 : JSNonFinalObject(vm, structure) 55 { 56 } 57 58 void RegExpPrototype::finishCreation(VM& vm, JSGlobalObject* globalObject) 59 { 60 Base::finishCreation(vm); 61 ASSERT(inherits(vm, info())); 62 JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->compile, regExpProtoFuncCompile, static_cast<unsigned>(PropertyAttribute::DontEnum), 2); 63 JSC_NATIVE_INTRINSIC_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->exec, regExpProtoFuncExec, static_cast<unsigned>(PropertyAttribute::DontEnum), 1, RegExpExecIntrinsic); 64 JSC_NATIVE_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->toString, regExpProtoFuncToString, static_cast<unsigned>(PropertyAttribute::DontEnum), 0); 65 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->global, regExpProtoGetterGlobal, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 66 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->dotAll, regExpProtoGetterDotAll, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 67 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->ignoreCase, regExpProtoGetterIgnoreCase, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 68 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->multiline, regExpProtoGetterMultiline, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 69 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->sticky, regExpProtoGetterSticky, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 70 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->unicode, regExpProtoGetterUnicode, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 71 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->source, regExpProtoGetterSource, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 72 JSC_NATIVE_GETTER_WITHOUT_TRANSITION(vm.propertyNames->flags, regExpProtoGetterFlags, PropertyAttribute::DontEnum | PropertyAttribute::Accessor); 73 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->matchSymbol, regExpPrototypeMatchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); 74 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->matchAllSymbol, regExpPrototypeMatchAllCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); 75 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->replaceSymbol, regExpPrototypeReplaceCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); 76 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->searchSymbol, regExpPrototypeSearchCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); 77 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->splitSymbol, regExpPrototypeSplitCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); 78 JSC_BUILTIN_FUNCTION_WITHOUT_TRANSITION(vm.propertyNames->test, regExpPrototypeTestCodeGenerator, static_cast<unsigned>(PropertyAttribute::DontEnum)); 79 } 80 81 // ------------------------------ Functions --------------------------- 82 83 JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncTestFast, (JSGlobalObject* globalObject, CallFrame* callFrame)) 84 { 85 VM& vm = globalObject->vm(); 86 auto scope = DECLARE_THROW_SCOPE(vm); 87 88 JSValue thisValue = callFrame->thisValue(); 89 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 90 if (UNLIKELY(!regexp)) 91 return throwVMTypeError(globalObject, scope); 92 JSString* string = callFrame->argument(0).toStringOrNull(globalObject); 93 EXCEPTION_ASSERT(!!scope.exception() == !string); 94 if (!string) 95 return JSValue::encode(jsUndefined()); 96 RELEASE_AND_RETURN(scope, JSValue::encode(jsBoolean(regexp->test(globalObject, string)))); 97 } 98 99 JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncExec, (JSGlobalObject* globalObject, CallFrame* callFrame)) 100 { 101 VM& vm = globalObject->vm(); 102 auto scope = DECLARE_THROW_SCOPE(vm); 103 104 JSValue thisValue = callFrame->thisValue(); 105 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 106 if (UNLIKELY(!regexp)) 107 return throwVMTypeError(globalObject, scope, "Builtin RegExp exec can only be called on a RegExp object"); 108 JSString* string = callFrame->argument(0).toStringOrNull(globalObject); 109 EXCEPTION_ASSERT(!!scope.exception() == !string); 110 if (!string) 111 return JSValue::encode(jsUndefined()); 112 RELEASE_AND_RETURN(scope, JSValue::encode(regexp->exec(globalObject, string))); 113 } 114 115 JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncMatchFast, (JSGlobalObject* globalObject, CallFrame* callFrame)) 116 { 117 RegExpObject* thisObject = jsCast<RegExpObject*>(callFrame->thisValue()); 118 JSString* string = jsCast<JSString*>(callFrame->uncheckedArgument(0)); 119 if (!thisObject->regExp()->global()) 120 return JSValue::encode(thisObject->exec(globalObject, string)); 121 return JSValue::encode(thisObject->matchGlobal(globalObject, string)); 122 } 123 124 JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncCompile, (JSGlobalObject* globalObject, CallFrame* callFrame)) 125 { 126 VM& vm = globalObject->vm(); 127 auto scope = DECLARE_THROW_SCOPE(vm); 128 129 JSValue thisValue = callFrame->thisValue(); 130 auto* thisRegExp = jsDynamicCast<RegExpObject*>(vm, thisValue); 131 if (UNLIKELY(!thisRegExp)) 132 return throwVMTypeError(globalObject, scope); 133 134 RegExp* regExp; 135 JSValue arg0 = callFrame->argument(0); 136 JSValue arg1 = callFrame->argument(1); 137 138 if (auto* regExpObject = jsDynamicCast<RegExpObject*>(vm, arg0)) { 139 if (!arg1.isUndefined()) 140 return throwVMTypeError(globalObject, scope, "Cannot supply flags when constructing one RegExp from another."_s); 141 regExp = regExpObject->regExp(); 142 } else { 143 String pattern = arg0.isUndefined() ? emptyString() : arg0.toWTFString(globalObject); 144 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 145 146 auto flags = arg1.isUndefined() ? makeOptional(OptionSet<Yarr::Flags> { }) : Yarr::parseFlags(arg1.toWTFString(globalObject)); 147 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 148 if (!flags) 149 return throwVMError(globalObject, scope, createSyntaxError(globalObject, "Invalid flags supplied to RegExp constructor."_s)); 150 151 regExp = RegExp::create(vm, pattern, flags.value()); 152 } 153 154 if (!regExp->isValid()) 155 return throwVMError(globalObject, scope, regExp->errorToThrow(globalObject)); 156 157 thisRegExp->setRegExp(vm, regExp); 158 scope.release(); 159 thisRegExp->setLastIndex(globalObject, 0); 160 return JSValue::encode(thisRegExp); 161 } 162 163 typedef std::array<char, 6 + 1> FlagsString; // 6 different flags and a null character terminator. 164 165 static inline FlagsString flagsString(JSGlobalObject* globalObject, JSObject* regexp) 166 { 167 FlagsString string; 168 string[0] = 0; 169 170 VM& vm = globalObject->vm(); 171 auto scope = DECLARE_THROW_SCOPE(vm); 172 173 JSValue globalValue = regexp->get(globalObject, vm.propertyNames->global); 174 RETURN_IF_EXCEPTION(scope, string); 175 JSValue ignoreCaseValue = regexp->get(globalObject, vm.propertyNames->ignoreCase); 176 RETURN_IF_EXCEPTION(scope, string); 177 JSValue multilineValue = regexp->get(globalObject, vm.propertyNames->multiline); 178 RETURN_IF_EXCEPTION(scope, string); 179 JSValue dotAllValue = regexp->get(globalObject, vm.propertyNames->dotAll); 180 RETURN_IF_EXCEPTION(scope, string); 181 JSValue unicodeValue = regexp->get(globalObject, vm.propertyNames->unicode); 182 RETURN_IF_EXCEPTION(scope, string); 183 JSValue stickyValue = regexp->get(globalObject, vm.propertyNames->sticky); 184 RETURN_IF_EXCEPTION(scope, string); 185 186 unsigned index = 0; 187 if (globalValue.toBoolean(globalObject)) 188 string[index++] = 'g'; 189 if (ignoreCaseValue.toBoolean(globalObject)) 190 string[index++] = 'i'; 191 if (multilineValue.toBoolean(globalObject)) 192 string[index++] = 'm'; 193 if (dotAllValue.toBoolean(globalObject)) 194 string[index++] = 's'; 195 if (unicodeValue.toBoolean(globalObject)) 196 string[index++] = 'u'; 197 if (stickyValue.toBoolean(globalObject)) 198 string[index++] = 'y'; 199 ASSERT(index < string.size()); 200 string[index] = 0; 201 return string; 202 } 203 204 JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncToString, (JSGlobalObject* globalObject, CallFrame* callFrame)) 205 { 206 VM& vm = globalObject->vm(); 207 auto scope = DECLARE_THROW_SCOPE(vm); 208 209 JSValue thisValue = callFrame->thisValue().toThis(globalObject, ECMAMode::strict()); 210 if (!thisValue.isObject()) 211 return throwVMTypeError(globalObject, scope); 212 213 JSObject* thisObject = asObject(thisValue); 214 Integrity::auditStructureID(vm, thisObject->structureID()); 215 216 StringRecursionChecker checker(globalObject, thisObject); 217 EXCEPTION_ASSERT(!scope.exception() || checker.earlyReturnValue()); 218 if (JSValue earlyReturnValue = checker.earlyReturnValue()) 219 return JSValue::encode(earlyReturnValue); 220 221 JSValue sourceValue = thisObject->get(globalObject, vm.propertyNames->source); 222 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 223 String source = sourceValue.toWTFString(globalObject); 224 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 225 226 JSValue flagsValue = thisObject->get(globalObject, vm.propertyNames->flags); 227 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 228 String flags = flagsValue.toWTFString(globalObject); 229 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 230 231 RELEASE_AND_RETURN(scope, JSValue::encode(jsMakeNontrivialString(globalObject, '/', source, '/', flags))); 232 } 233 234 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterGlobal, (JSGlobalObject* globalObject, CallFrame* callFrame)) 235 { 236 VM& vm = globalObject->vm(); 237 auto scope = DECLARE_THROW_SCOPE(vm); 238 239 JSValue thisValue = callFrame->thisValue(); 240 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 241 if (UNLIKELY(!regexp)) { 242 if (thisValue == globalObject->regExpPrototype()) 243 return JSValue::encode(jsUndefined()); 244 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.global getter can only be called on a RegExp object"_s); 245 } 246 247 return JSValue::encode(jsBoolean(regexp->regExp()->global())); 248 } 249 250 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterIgnoreCase, (JSGlobalObject* globalObject, CallFrame* callFrame)) 251 { 252 VM& vm = globalObject->vm(); 253 auto scope = DECLARE_THROW_SCOPE(vm); 254 255 JSValue thisValue = callFrame->thisValue(); 256 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 257 if (UNLIKELY(!regexp)) { 258 if (thisValue == globalObject->regExpPrototype()) 259 return JSValue::encode(jsUndefined()); 260 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.ignoreCase getter can only be called on a RegExp object"_s); 261 } 262 263 return JSValue::encode(jsBoolean(regexp->regExp()->ignoreCase())); 264 } 265 266 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterMultiline, (JSGlobalObject* globalObject, CallFrame* callFrame)) 267 { 268 VM& vm = globalObject->vm(); 269 auto scope = DECLARE_THROW_SCOPE(vm); 270 271 JSValue thisValue = callFrame->thisValue(); 272 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 273 if (UNLIKELY(!regexp)) { 274 if (thisValue == globalObject->regExpPrototype()) 275 return JSValue::encode(jsUndefined()); 276 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.multiline getter can only be called on a RegExp object"_s); 277 } 278 279 return JSValue::encode(jsBoolean(regexp->regExp()->multiline())); 280 } 281 282 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterDotAll, (JSGlobalObject* globalObject, CallFrame* callFrame)) 283 { 284 VM& vm = globalObject->vm(); 285 auto scope = DECLARE_THROW_SCOPE(vm); 286 287 JSValue thisValue = callFrame->thisValue(); 288 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 289 if (UNLIKELY(!regexp)) { 290 if (thisValue == globalObject->regExpPrototype()) 291 return JSValue::encode(jsUndefined()); 292 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.dotAll getter can only be called on a RegExp object"_s); 293 } 294 295 return JSValue::encode(jsBoolean(regexp->regExp()->dotAll())); 296 } 297 298 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterSticky, (JSGlobalObject* globalObject, CallFrame* callFrame)) 299 { 300 VM& vm = globalObject->vm(); 301 auto scope = DECLARE_THROW_SCOPE(vm); 302 303 JSValue thisValue = callFrame->thisValue(); 304 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 305 if (UNLIKELY(!regexp)) { 306 if (thisValue == globalObject->regExpPrototype()) 307 return JSValue::encode(jsUndefined()); 308 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.sticky getter can only be called on a RegExp object"_s); 309 } 310 311 return JSValue::encode(jsBoolean(regexp->regExp()->sticky())); 312 } 313 314 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterUnicode, (JSGlobalObject* globalObject, CallFrame* callFrame)) 315 { 316 VM& vm = globalObject->vm(); 317 auto scope = DECLARE_THROW_SCOPE(vm); 318 319 JSValue thisValue = callFrame->thisValue(); 320 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 321 if (UNLIKELY(!regexp)) { 322 if (thisValue == globalObject->regExpPrototype()) 323 return JSValue::encode(jsUndefined()); 324 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.unicode getter can only be called on a RegExp object"_s); 325 } 326 327 return JSValue::encode(jsBoolean(regexp->regExp()->unicode())); 328 } 329 330 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterFlags, (JSGlobalObject* globalObject, CallFrame* callFrame)) 331 { 332 VM& vm = globalObject->vm(); 333 auto scope = DECLARE_THROW_SCOPE(vm); 334 335 JSValue thisValue = callFrame->thisValue(); 336 if (UNLIKELY(!thisValue.isObject())) 337 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.flags getter can only be called on an object"_s); 338 339 auto flags = flagsString(globalObject, asObject(thisValue)); 340 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 341 342 return JSValue::encode(jsString(vm, flags.data())); 343 } 344 345 template <typename CharacterType> 346 static inline void appendLineTerminatorEscape(StringBuilder&, CharacterType); 347 348 template <> 349 inline void appendLineTerminatorEscape<LChar>(StringBuilder& builder, LChar lineTerminator) 350 { 351 if (lineTerminator == '\n') 352 builder.append('n'); 353 else 354 builder.append('r'); 355 } 356 357 template <> 358 inline void appendLineTerminatorEscape<UChar>(StringBuilder& builder, UChar lineTerminator) 359 { 360 if (lineTerminator == '\n') 361 builder.append('n'); 362 else if (lineTerminator == '\r') 363 builder.append('r'); 364 else if (lineTerminator == 0x2028) 365 builder.appendLiteral("u2028"); 366 else 367 builder.appendLiteral("u2029"); 368 } 369 370 template <typename CharacterType> 371 static inline JSValue regExpProtoGetterSourceInternal(JSGlobalObject* globalObject, const String& pattern, const CharacterType* characters, unsigned length) 372 { 373 VM& vm = globalObject->vm(); 374 bool previousCharacterWasBackslash = false; 375 bool inBrackets = false; 376 bool shouldEscape = false; 377 378 // 15.10.6.4 specifies that RegExp.prototype.toString must return '/' + source + '/', 379 // and also states that the result must be a valid RegularExpressionLiteral. '//' is 380 // not a valid RegularExpressionLiteral (since it is a single line comment), and hence 381 // source cannot ever validly be "". If the source is empty, return a different Pattern 382 // that would match the same thing. 383 if (!length) 384 return jsNontrivialString(vm, "(?:)"_s); 385 386 // early return for strings that don't contain a forwards slash and LineTerminator 387 for (unsigned i = 0; i < length; ++i) { 388 CharacterType ch = characters[i]; 389 if (!previousCharacterWasBackslash) { 390 if (inBrackets) { 391 if (ch == ']') 392 inBrackets = false; 393 } else { 394 if (ch == '/') { 395 shouldEscape = true; 396 break; 397 } 398 if (ch == '[') 399 inBrackets = true; 400 } 401 } 402 403 if (Lexer<CharacterType>::isLineTerminator(ch)) { 404 shouldEscape = true; 405 break; 406 } 407 408 if (previousCharacterWasBackslash) 409 previousCharacterWasBackslash = false; 410 else 411 previousCharacterWasBackslash = ch == '\\'; 412 } 413 414 if (!shouldEscape) 415 return jsString(vm, pattern); 416 417 previousCharacterWasBackslash = false; 418 inBrackets = false; 419 StringBuilder result; 420 for (unsigned i = 0; i < length; ++i) { 421 CharacterType ch = characters[i]; 422 if (!previousCharacterWasBackslash) { 423 if (inBrackets) { 424 if (ch == ']') 425 inBrackets = false; 426 } else { 427 if (ch == '/') 428 result.append('\\'); 429 else if (ch == '[') 430 inBrackets = true; 431 } 432 } 433 434 // escape LineTerminator 435 if (Lexer<CharacterType>::isLineTerminator(ch)) { 436 if (!previousCharacterWasBackslash) 437 result.append('\\'); 438 439 appendLineTerminatorEscape<CharacterType>(result, ch); 440 } else 441 result.append(ch); 442 443 if (previousCharacterWasBackslash) 444 previousCharacterWasBackslash = false; 445 else 446 previousCharacterWasBackslash = ch == '\\'; 447 } 448 449 return jsString(vm, result.toString()); 450 } 451 452 JSC_DEFINE_HOST_FUNCTION(regExpProtoGetterSource, (JSGlobalObject* globalObject, CallFrame* callFrame)) 453 { 454 VM& vm = globalObject->vm(); 455 auto scope = DECLARE_THROW_SCOPE(vm); 456 457 JSValue thisValue = callFrame->thisValue(); 458 auto* regexp = jsDynamicCast<RegExpObject*>(vm, thisValue); 459 if (UNLIKELY(!regexp)) { 460 if (thisValue == globalObject->regExpPrototype()) 461 return JSValue::encode(jsNontrivialString(vm, "(?:)"_s)); 462 return throwVMTypeError(globalObject, scope, "The RegExp.prototype.source getter can only be called on a RegExp object"_s); 463 } 464 465 String pattern = regexp->regExp()->pattern(); 466 if (pattern.is8Bit()) 467 return JSValue::encode(regExpProtoGetterSourceInternal(globalObject, pattern, pattern.characters8(), pattern.length())); 468 return JSValue::encode(regExpProtoGetterSourceInternal(globalObject, pattern, pattern.characters16(), pattern.length())); 469 } 470 471 JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncSearchFast, (JSGlobalObject* globalObject, CallFrame* callFrame)) 472 { 473 VM& vm = globalObject->vm(); 474 auto scope = DECLARE_THROW_SCOPE(vm); 475 JSValue thisValue = callFrame->thisValue(); 476 RegExp* regExp = jsCast<RegExpObject*>(thisValue)->regExp(); 477 478 JSString* string = callFrame->uncheckedArgument(0).toString(globalObject); 479 String s = string->value(globalObject); 480 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 481 482 MatchResult result = globalObject->regExpGlobalData().performMatch(globalObject, regExp, string, s, 0); 483 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 484 return JSValue::encode(result ? jsNumber(result.start) : jsNumber(-1)); 485 } 486 487 static inline unsigned advanceStringIndex(String str, unsigned strSize, unsigned index, bool isUnicode) 488 { 489 if (!isUnicode) 490 return ++index; 491 return advanceStringUnicode(str, strSize, index); 492 } 493 494 enum SplitControl { 495 ContinueSplit, 496 AbortSplit 497 }; 498 499 template<typename ControlFunc, typename PushFunc> 500 void genericSplit( 501 JSGlobalObject* globalObject, RegExp* regexp, const String& input, unsigned inputSize, unsigned& position, 502 unsigned& matchPosition, bool regExpIsSticky, bool regExpIsUnicode, 503 const ControlFunc& control, const PushFunc& push) 504 { 505 VM& vm = globalObject->vm(); 506 auto scope = DECLARE_THROW_SCOPE(vm); 507 Vector<int> ovector; 508 509 while (matchPosition < inputSize) { 510 { 511 auto result = control(); 512 RETURN_IF_EXCEPTION(scope, void()); 513 if (result == AbortSplit) 514 return; 515 } 516 517 ovector.shrink(0); 518 519 // a. Perform ? Set(splitter, "lastIndex", q, true). 520 // b. Let z be ? RegExpExec(splitter, S). 521 int mpos = regexp->match(globalObject, input, matchPosition, ovector); 522 RETURN_IF_EXCEPTION(scope, void()); 523 524 // c. If z is null, let q be AdvanceStringIndex(S, q, unicodeMatching). 525 if (mpos < 0) { 526 if (!regExpIsSticky) 527 break; 528 matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode); 529 continue; 530 } 531 if (static_cast<unsigned>(mpos) >= inputSize) { 532 // The spec redoes the RegExpExec starting at the next character of the input. 533 // But in our case, mpos < 0 means that the native regexp already searched all permutations 534 // and know that we won't be able to find a match for the separator even if we redo the 535 // RegExpExec starting at the next character of the input. So, just bail. 536 break; 537 } 538 539 // d. Else, z is not null 540 // i. Let e be ? ToLength(? Get(splitter, "lastIndex")). 541 // ii. Let e be min(e, size). 542 matchPosition = mpos; 543 unsigned matchEnd = ovector[1]; 544 545 // iii. If e = p, let q be AdvanceStringIndex(S, q, unicodeMatching). 546 if (matchEnd == position) { 547 matchPosition = advanceStringIndex(input, inputSize, matchPosition, regExpIsUnicode); 548 continue; 549 } 550 // if matchEnd == 0 then position should also be zero and thus matchEnd should equal position. 551 ASSERT(matchEnd); 552 553 // iv. Else e != p, 554 unsigned numberOfCaptures = regexp->numSubpatterns(); 555 556 // 1. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through q (exclusive). 557 // 2. Perform ! CreateDataProperty(A, ! ToString(lengthA), T). 558 { 559 auto result = push(true, position, matchPosition - position); 560 RETURN_IF_EXCEPTION(scope, void()); 561 if (result == AbortSplit) 562 return; 563 } 564 565 // 5. Let p be e. 566 position = matchEnd; 567 568 // 6. Let numberOfCaptures be ? ToLength(? Get(z, "length")). 569 // 7. Let numberOfCaptures be max(numberOfCaptures-1, 0). 570 // 8. Let i be 1. 571 // 9. Repeat, while i <= numberOfCaptures, 572 for (unsigned i = 1; i <= numberOfCaptures; ++i) { 573 // a. Let nextCapture be ? Get(z, ! ToString(i)). 574 // b. Perform ! CreateDataProperty(A, ! ToString(lengthA), nextCapture). 575 int sub = ovector[i * 2]; 576 auto result = push(sub >= 0, sub, ovector[i * 2 + 1] - sub); 577 RETURN_IF_EXCEPTION(scope, void()); 578 if (result == AbortSplit) 579 return; 580 } 581 582 // 10. Let q be p. 583 matchPosition = position; 584 } 585 } 586 587 // ES 21.2.5.11 RegExp.prototype[@@split](string, limit) 588 JSC_DEFINE_HOST_FUNCTION(regExpProtoFuncSplitFast, (JSGlobalObject* globalObject, CallFrame* callFrame)) 589 { 590 VM& vm = globalObject->vm(); 591 auto scope = DECLARE_THROW_SCOPE(vm); 592 593 // 1. [handled by JS builtin] Let rx be the this value. 594 // 2. [handled by JS builtin] If Type(rx) is not Object, throw a TypeError exception. 595 JSValue thisValue = callFrame->thisValue(); 596 RegExp* regexp = jsCast<RegExpObject*>(thisValue)->regExp(); 597 598 // 3. [handled by JS builtin] Let S be ? ToString(string). 599 JSString* inputString = callFrame->argument(0).toString(globalObject); 600 String input = inputString->value(globalObject); 601 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 602 ASSERT(!input.isNull()); 603 604 // 4. [handled by JS builtin] Let C be ? SpeciesConstructor(rx, %RegExp%). 605 // 5. [handled by JS builtin] Let flags be ? ToString(? Get(rx, "flags")). 606 // 6. [handled by JS builtin] If flags contains "u", let unicodeMatching be true. 607 // 7. [handled by JS builtin] Else, let unicodeMatching be false. 608 // 8. [handled by JS builtin] If flags contains "y", let newFlags be flags. 609 // 9. [handled by JS builtin] Else, let newFlags be the string that is the concatenation of flags and "y". 610 // 10. [handled by JS builtin] Let splitter be ? Construct(C, « rx, newFlags »). 611 612 // 11. Let A be ArrayCreate(0). 613 // 12. Let lengthA be 0. 614 JSArray* result = constructEmptyArray(globalObject, nullptr); 615 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 616 unsigned resultLength = 0; 617 618 // 13. If limit is undefined, let lim be 2^32-1; else let lim be ? ToUint32(limit). 619 JSValue limitValue = callFrame->argument(1); 620 unsigned limit = limitValue.isUndefined() ? 0xFFFFFFFFu : limitValue.toUInt32(globalObject); 621 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 622 623 // 14. Let size be the number of elements in S. 624 unsigned inputSize = input.length(); 625 626 // 15. Let p = 0. 627 unsigned position = 0; 628 629 // 16. If lim == 0, return A. 630 if (!limit) 631 return JSValue::encode(result); 632 633 // 17. If size == 0, then 634 if (input.isEmpty()) { 635 // a. Let z be ? RegExpExec(splitter, S). 636 // b. If z is not null, return A. 637 // c. Perform ! CreateDataProperty(A, "0", S). 638 // d. Return A. 639 auto matchResult = regexp->match(globalObject, input, 0); 640 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 641 if (!matchResult) { 642 result->putDirectIndex(globalObject, 0, inputString); 643 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 644 } 645 return JSValue::encode(result); 646 } 647 648 // 18. Let q = p. 649 unsigned matchPosition = position; 650 // 19. Repeat, while q < size 651 bool regExpIsSticky = regexp->sticky(); 652 bool regExpIsUnicode = regexp->unicode(); 653 654 unsigned maxSizeForDirectPath = 100000; 655 656 genericSplit( 657 globalObject, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode, 658 [&] () -> SplitControl { 659 if (resultLength >= maxSizeForDirectPath) 660 return AbortSplit; 661 return ContinueSplit; 662 }, 663 [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl { 664 result->putDirectIndex(globalObject, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined()); 665 RETURN_IF_EXCEPTION(scope, AbortSplit); 666 if (resultLength >= limit) 667 return AbortSplit; 668 return ContinueSplit; 669 }); 670 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 671 672 if (resultLength >= limit) 673 return JSValue::encode(result); 674 if (resultLength < maxSizeForDirectPath) { 675 // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive). 676 // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T). 677 scope.release(); 678 result->putDirectIndex(globalObject, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position)); 679 680 // 22. Return A. 681 return JSValue::encode(result); 682 } 683 684 // Now do a dry run to see how big things get. Give up if they get absurd. 685 unsigned savedPosition = position; 686 unsigned savedMatchPosition = matchPosition; 687 unsigned dryRunCount = 0; 688 genericSplit( 689 globalObject, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode, 690 [&] () -> SplitControl { 691 if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH) 692 return AbortSplit; 693 return ContinueSplit; 694 }, 695 [&] (bool, unsigned, unsigned) -> SplitControl { 696 dryRunCount++; 697 if (resultLength + dryRunCount >= limit) 698 return AbortSplit; 699 return ContinueSplit; 700 }); 701 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 702 703 if (resultLength + dryRunCount > MAX_STORAGE_VECTOR_LENGTH) { 704 throwOutOfMemoryError(globalObject, scope); 705 return encodedJSValue(); 706 } 707 708 // OK, we know that if we finish the split, we won't have to OOM. 709 position = savedPosition; 710 matchPosition = savedMatchPosition; 711 712 genericSplit( 713 globalObject, regexp, input, inputSize, position, matchPosition, regExpIsSticky, regExpIsUnicode, 714 [&] () -> SplitControl { 715 return ContinueSplit; 716 }, 717 [&] (bool isDefined, unsigned start, unsigned length) -> SplitControl { 718 result->putDirectIndex(globalObject, resultLength++, isDefined ? jsSubstringOfResolved(vm, inputString, start, length) : jsUndefined()); 719 RETURN_IF_EXCEPTION(scope, AbortSplit); 720 if (resultLength >= limit) 721 return AbortSplit; 722 return ContinueSplit; 723 }); 724 RETURN_IF_EXCEPTION(scope, encodedJSValue()); 725 726 if (resultLength >= limit) 727 return JSValue::encode(result); 728 729 // 20. Let T be a String value equal to the substring of S consisting of the elements at indices p (inclusive) through size (exclusive). 730 // 21. Perform ! CreateDataProperty(A, ! ToString(lengthA), T). 731 scope.release(); 732 result->putDirectIndex(globalObject, resultLength, jsSubstringOfResolved(vm, inputString, position, inputSize - position)); 733 // 22. Return A. 734 return JSValue::encode(result); 735 } 736 737 } // namespace JSC