/ testRegExp.cpp
testRegExp.cpp
1 /* 2 * Copyright (C) 2011-2019 Apple Inc. All rights reserved. 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Library General Public 6 * License as published by the Free Software Foundation; either 7 * version 2 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Library General Public License for more details. 13 * 14 * You should have received a copy of the GNU Library General Public License 15 * along with this library; see the file COPYING.LIB. If not, write to 16 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 17 * Boston, MA 02110-1301, USA. 18 * 19 */ 20 21 #include "config.h" 22 #include "RegExp.h" 23 24 #include "InitializeThreading.h" 25 #include "JSCInlines.h" 26 #include "YarrFlags.h" 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <wtf/Vector.h> 31 #include <wtf/text/StringBuilder.h> 32 33 #if COMPILER(MSVC) 34 #include <crtdbg.h> 35 #include <mmsystem.h> 36 #include <windows.h> 37 #endif 38 39 const int MaxLineLength = 100 * 1024; 40 41 using namespace JSC; 42 43 struct CommandLine { 44 CommandLine() 45 : interactive(false) 46 , verbose(false) 47 { 48 } 49 50 bool interactive; 51 bool verbose; 52 Vector<String> arguments; 53 Vector<String> files; 54 }; 55 56 class StopWatch { 57 public: 58 void start(); 59 void stop(); 60 long getElapsedMS(); // call stop() first 61 62 private: 63 MonotonicTime m_startTime; 64 MonotonicTime m_stopTime; 65 }; 66 67 void StopWatch::start() 68 { 69 m_startTime = MonotonicTime::now(); 70 } 71 72 void StopWatch::stop() 73 { 74 m_stopTime = MonotonicTime::now(); 75 } 76 77 long StopWatch::getElapsedMS() 78 { 79 return (m_stopTime - m_startTime).millisecondsAs<long>(); 80 } 81 82 struct RegExpTest { 83 RegExpTest() 84 : offset(0) 85 , result(0) 86 { 87 } 88 89 String subject; 90 int offset; 91 int result; 92 Vector<int, 32> expectVector; 93 }; 94 95 class GlobalObject final : public JSGlobalObject { 96 public: 97 using Base = JSGlobalObject; 98 99 static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments) 100 { 101 GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments); 102 return globalObject; 103 } 104 105 DECLARE_INFO; 106 107 static constexpr bool needsDestructor = true; 108 109 static Structure* createStructure(VM& vm, JSValue prototype) 110 { 111 return Structure::create(vm, nullptr, prototype, TypeInfo(GlobalObjectType, StructureFlags), info()); 112 } 113 114 private: 115 GlobalObject(VM&, Structure*, const Vector<String>& arguments); 116 117 void finishCreation(VM& vm, const Vector<String>& arguments) 118 { 119 Base::finishCreation(vm); 120 UNUSED_PARAM(arguments); 121 } 122 }; 123 STATIC_ASSERT_ISO_SUBSPACE_SHARABLE(GlobalObject, JSGlobalObject); 124 125 const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(GlobalObject) }; 126 127 GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments) 128 : JSGlobalObject(vm, structure) 129 { 130 finishCreation(vm, arguments); 131 } 132 133 // Use SEH for Release builds only to get rid of the crash report dialog 134 // (luckily the same tests fail in Release and Debug builds so far). Need to 135 // be in a separate main function because the realMain function requires object 136 // unwinding. 137 138 #if COMPILER(MSVC) && !defined(_DEBUG) 139 #define TRY __try { 140 #define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; } 141 #else 142 #define TRY 143 #define EXCEPT(x) 144 #endif 145 146 int realMain(int argc, char** argv); 147 148 int main(int argc, char** argv) 149 { 150 #if OS(WINDOWS) 151 // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for 152 // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the 153 // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>. 154 ::SetErrorMode(0); 155 156 #if defined(_DEBUG) 157 _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR); 158 _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE); 159 _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR); 160 _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE); 161 _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR); 162 _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE); 163 #endif 164 165 timeBeginPeriod(1); 166 #endif 167 168 JSC::initialize(); 169 170 // We can't use destructors in the following code because it uses Windows 171 // Structured Exception Handling 172 int res = 0; 173 TRY 174 res = realMain(argc, argv); 175 EXCEPT(res = 3) 176 return res; 177 } 178 179 static bool testOneRegExp(JSGlobalObject* globalObject, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned lineNumber) 180 { 181 bool result = true; 182 Vector<int> outVector; 183 outVector.resize(regExpTest->expectVector.size()); 184 int matchResult = regexp->match(globalObject, regExpTest->subject, regExpTest->offset, outVector); 185 186 if (matchResult != regExpTest->result) { 187 result = false; 188 if (verbose) 189 printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult); 190 } else if (matchResult != -1) { 191 if (outVector.size() != regExpTest->expectVector.size()) { 192 result = false; 193 if (verbose) { 194 #if OS(WINDOWS) 195 printf("Line %d: output vector size mismatch - expected %Iu got %Iu\n", lineNumber, regExpTest->expectVector.size(), outVector.size()); 196 #else 197 printf("Line %d: output vector size mismatch - expected %zu got %zu\n", lineNumber, regExpTest->expectVector.size(), outVector.size()); 198 #endif 199 } 200 } else if (outVector.size() % 2) { 201 result = false; 202 if (verbose) { 203 #if OS(WINDOWS) 204 printf("Line %d: output vector size is odd (%Iu), should be even\n", lineNumber, outVector.size()); 205 #else 206 printf("Line %d: output vector size is odd (%zu), should be even\n", lineNumber, outVector.size()); 207 #endif 208 } 209 } else { 210 // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter. 211 size_t pairCount = outVector.size() / 2; 212 for (size_t i = 0; i < pairCount; ++i) { 213 size_t startIndex = i*2; 214 if (outVector[startIndex] != regExpTest->expectVector[startIndex]) { 215 result = false; 216 if (verbose) { 217 #if OS(WINDOWS) 218 printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]); 219 #else 220 printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]); 221 #endif 222 } 223 } 224 if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) { 225 result = false; 226 if (verbose) { 227 #if OS(WINDOWS) 228 printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]); 229 #else 230 printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]); 231 #endif 232 } 233 } 234 } 235 } 236 } 237 238 return result; 239 } 240 241 static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar) 242 { 243 bool escape = false; 244 245 for (int i = 0; i < bufferLength; ++i) { 246 UChar c = buffer[i]; 247 248 if (escape) { 249 switch (c) { 250 case '0': 251 c = '\0'; 252 break; 253 case 'a': 254 c = '\a'; 255 break; 256 case 'b': 257 c = '\b'; 258 break; 259 case 'f': 260 c = '\f'; 261 break; 262 case 'n': 263 c = '\n'; 264 break; 265 case 'r': 266 c = '\r'; 267 break; 268 case 't': 269 c = '\t'; 270 break; 271 case 'v': 272 c = '\v'; 273 break; 274 case '\\': 275 c = '\\'; 276 break; 277 case '?': 278 c = '\?'; 279 break; 280 case 'u': 281 if ((i + 4) >= bufferLength) 282 return -1; 283 unsigned int charValue; 284 if (sscanf(buffer+i+1, "%04x", &charValue) != 1) 285 return -1; 286 c = static_cast<UChar>(charValue); 287 i += 4; 288 break; 289 } 290 291 builder.append(c); 292 escape = false; 293 } else { 294 if (c == termChar) 295 return i; 296 297 if (c == '\\') 298 escape = true; 299 else 300 builder.append(c); 301 } 302 } 303 304 return -1; 305 } 306 307 static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength, const char** regexpError) 308 { 309 StringBuilder pattern; 310 311 if (line[0] != '/') 312 return nullptr; 313 314 int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1; 315 316 if ((i >= lineLength) || (line[i] != '/')) 317 return nullptr; 318 319 ++i; 320 321 auto flags = Yarr::parseFlags(line + i); 322 if (!flags) { 323 *regexpError = Yarr::errorMessage(Yarr::ErrorCode::InvalidRegularExpressionFlags); 324 return nullptr; 325 } 326 327 RegExp* r = RegExp::create(vm, pattern.toString(), flags.value()); 328 if (!r->isValid()) { 329 *regexpError = r->errorMessage(); 330 return nullptr; 331 } 332 333 return r; 334 } 335 336 static RegExpTest* parseTestLine(char* line, int lineLength) 337 { 338 StringBuilder subjectString; 339 340 if ((line[0] != ' ') || (line[1] != '"')) 341 return nullptr; 342 343 int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2; 344 345 if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' ')) 346 return nullptr; 347 348 i += 3; 349 350 int offset; 351 352 if (sscanf(line + i, "%d, ", &offset) != 1) 353 return nullptr; 354 355 while (line[i] && line[i] != ' ') 356 ++i; 357 358 ++i; 359 360 int matchResult; 361 362 if (sscanf(line + i, "%d, ", &matchResult) != 1) 363 return nullptr; 364 365 while (line[i] && line[i] != ' ') 366 ++i; 367 368 ++i; 369 370 if (line[i++] != '(') 371 return nullptr; 372 373 int start, end; 374 375 RegExpTest* result = new RegExpTest(); 376 377 result->subject = subjectString.toString(); 378 result->offset = offset; 379 result->result = matchResult; 380 381 while (line[i] && line[i] != ')') { 382 if (sscanf(line + i, "%d, %d", &start, &end) != 2) { 383 delete result; 384 return nullptr; 385 } 386 387 result->expectVector.append(start); 388 result->expectVector.append(end); 389 390 while (line[i] && (line[i] != ',') && (line[i] != ')')) 391 i++; 392 i++; 393 while (line[i] && (line[i] != ',') && (line[i] != ')')) 394 i++; 395 396 if (line[i] == ')') 397 break; 398 if (!line[i] || (line[i] != ',')) { 399 delete result; 400 return nullptr; 401 } 402 i++; 403 } 404 405 return result; 406 } 407 408 static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose) 409 { 410 String script; 411 String fileName; 412 Vector<char> scriptBuffer; 413 unsigned tests = 0; 414 unsigned failures = 0; 415 Vector<char> lineBuffer(MaxLineLength + 1); 416 417 VM& vm = globalObject->vm(); 418 419 bool success = true; 420 for (size_t i = 0; i < files.size(); i++) { 421 FILE* testCasesFile = fopen(files[i].utf8().data(), "rb"); 422 423 if (!testCasesFile) { 424 printf("Unable to open test data file \"%s\"\n", files[i].utf8().data()); 425 continue; 426 } 427 428 RegExp* regexp = nullptr; 429 size_t lineLength = 0; 430 char* linePtr = nullptr; 431 unsigned int lineNumber = 0; 432 const char* regexpError = nullptr; 433 434 while ((linePtr = fgets(lineBuffer.data(), MaxLineLength, testCasesFile))) { 435 lineLength = strlen(linePtr); 436 if (linePtr[lineLength - 1] == '\n') { 437 linePtr[lineLength - 1] = '\0'; 438 --lineLength; 439 } 440 ++lineNumber; 441 442 if (linePtr[0] == '#') 443 continue; 444 445 if (linePtr[0] == '/') { 446 regexp = parseRegExpLine(vm, linePtr, lineLength, ®expError); 447 if (!regexp) { 448 failures++; 449 fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr, regexpError); 450 } 451 } else if (linePtr[0] == ' ') { 452 RegExpTest* regExpTest = parseTestLine(linePtr, lineLength); 453 454 if (regexp && regExpTest) { 455 ++tests; 456 if (!testOneRegExp(globalObject, regexp, regExpTest, verbose, lineNumber)) { 457 failures++; 458 printf("Failure on line %u\n", lineNumber); 459 } 460 } 461 462 if (regExpTest) 463 delete regExpTest; 464 } else if (linePtr[0] == '-') { 465 tests++; 466 regexp = nullptr; // Reset the live regexp to avoid confusing other subsequent tests 467 bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1, ®expError); 468 if (successfullyParsed) { 469 failures++; 470 fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr + 1, regexpError); 471 } 472 } 473 } 474 475 fclose(testCasesFile); 476 } 477 478 if (failures) 479 printf("%u tests run, %u failures\n", tests, failures); 480 else 481 printf("%u tests passed\n", tests); 482 483 #if ENABLE(REGEXP_TRACING) 484 vm.dumpRegExpTrace(); 485 #endif 486 return success; 487 } 488 489 #define RUNNING_FROM_XCODE 0 490 491 static NO_RETURN void printUsageStatement(bool help = false) 492 { 493 fprintf(stderr, "Usage: regexp_test [options] file\n"); 494 fprintf(stderr, " -h|--help Prints this help message\n"); 495 fprintf(stderr, " -v|--verbose Verbose output\n"); 496 497 exit(help ? EXIT_SUCCESS : EXIT_FAILURE); 498 } 499 500 static void parseArguments(int argc, char** argv, CommandLine& options) 501 { 502 int i = 1; 503 for (; i < argc; ++i) { 504 const char* arg = argv[i]; 505 if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) 506 printUsageStatement(true); 507 if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose")) 508 options.verbose = true; 509 else 510 options.files.append(argv[i]); 511 } 512 513 for (; i < argc; ++i) 514 options.arguments.append(argv[i]); 515 } 516 517 int realMain(int argc, char** argv) 518 { 519 VM* vm = &VM::create(LargeHeap).leakRef(); 520 JSLockHolder locker(vm); 521 522 CommandLine options; 523 parseArguments(argc, argv, options); 524 525 GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments); 526 bool success = runFromFiles(globalObject, options.files, options.verbose); 527 528 return success ? 0 : 3; 529 } 530 531 #if OS(WINDOWS) 532 extern "C" __declspec(dllexport) int WINAPI dllLauncherEntryPoint(int argc, const char* argv[]) 533 { 534 return main(argc, const_cast<char**>(argv)); 535 } 536 #endif