/ testRegExp.cpp
testRegExp.cpp
  1  /*
  2   *  Copyright (C) 2011-2019 Apple Inc. All rights reserved.
  3   *
  4   *  This library is free software; you can redistribute it and/or
  5   *  modify it under the terms of the GNU Library General Public
  6   *  License as published by the Free Software Foundation; either
  7   *  version 2 of the License, or (at your option) any later version.
  8   *
  9   *  This library is distributed in the hope that it will be useful,
 10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 12   *  Library General Public License for more details.
 13   *
 14   *  You should have received a copy of the GNU Library General Public License
 15   *  along with this library; see the file COPYING.LIB.  If not, write to
 16   *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 17   *  Boston, MA 02110-1301, USA.
 18   *
 19   */
 20  
 21  #include "config.h"
 22  #include "RegExp.h"
 23  
 24  #include "InitializeThreading.h"
 25  #include "JSCInlines.h"
 26  #include "YarrFlags.h"
 27  #include <stdio.h>
 28  #include <stdlib.h>
 29  #include <string.h>
 30  #include <wtf/Vector.h>
 31  #include <wtf/text/StringBuilder.h>
 32  
 33  #if COMPILER(MSVC)
 34  #include <crtdbg.h>
 35  #include <mmsystem.h>
 36  #include <windows.h>
 37  #endif
 38  
 39  const int MaxLineLength = 100 * 1024;
 40  
 41  using namespace JSC;
 42  
 43  struct CommandLine {
 44      CommandLine()
 45          : interactive(false)
 46          , verbose(false)
 47      {
 48      }
 49  
 50      bool interactive;
 51      bool verbose;
 52      Vector<String> arguments;
 53      Vector<String> files;
 54  };
 55  
 56  class StopWatch {
 57  public:
 58      void start();
 59      void stop();
 60      long getElapsedMS(); // call stop() first
 61  
 62  private:
 63      MonotonicTime m_startTime;
 64      MonotonicTime m_stopTime;
 65  };
 66  
 67  void StopWatch::start()
 68  {
 69      m_startTime = MonotonicTime::now();
 70  }
 71  
 72  void StopWatch::stop()
 73  {
 74      m_stopTime = MonotonicTime::now();
 75  }
 76  
 77  long StopWatch::getElapsedMS()
 78  {
 79      return (m_stopTime - m_startTime).millisecondsAs<long>();
 80  }
 81  
 82  struct RegExpTest {
 83      RegExpTest()
 84          : offset(0)
 85          , result(0)
 86      {
 87      }
 88  
 89      String subject;
 90      int offset;
 91      int result;
 92      Vector<int, 32> expectVector;
 93  };
 94  
 95  class GlobalObject final : public JSGlobalObject {
 96  public:
 97      using Base = JSGlobalObject;
 98  
 99      static GlobalObject* create(VM& vm, Structure* structure, const Vector<String>& arguments)
100      {
101          GlobalObject* globalObject = new (NotNull, allocateCell<GlobalObject>(vm.heap)) GlobalObject(vm, structure, arguments);
102          return globalObject;
103      }
104  
105      DECLARE_INFO;
106  
107      static constexpr bool needsDestructor = true;
108  
109      static Structure* createStructure(VM& vm, JSValue prototype)
110      {
111          return Structure::create(vm, nullptr, prototype, TypeInfo(GlobalObjectType, StructureFlags), info());
112      }
113  
114  private:
115      GlobalObject(VM&, Structure*, const Vector<String>& arguments);
116  
117      void finishCreation(VM& vm, const Vector<String>& arguments)
118      {
119          Base::finishCreation(vm);
120          UNUSED_PARAM(arguments);
121      }
122  };
123  STATIC_ASSERT_ISO_SUBSPACE_SHARABLE(GlobalObject, JSGlobalObject);
124  
125  const ClassInfo GlobalObject::s_info = { "global", &JSGlobalObject::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(GlobalObject) };
126  
127  GlobalObject::GlobalObject(VM& vm, Structure* structure, const Vector<String>& arguments)
128      : JSGlobalObject(vm, structure)
129  {
130      finishCreation(vm, arguments);
131  }
132  
133  // Use SEH for Release builds only to get rid of the crash report dialog
134  // (luckily the same tests fail in Release and Debug builds so far). Need to
135  // be in a separate main function because the realMain function requires object
136  // unwinding.
137  
138  #if COMPILER(MSVC) && !defined(_DEBUG)
139  #define TRY       __try {
140  #define EXCEPT(x) } __except (EXCEPTION_EXECUTE_HANDLER) { x; }
141  #else
142  #define TRY
143  #define EXCEPT(x)
144  #endif
145  
146  int realMain(int argc, char** argv);
147  
148  int main(int argc, char** argv)
149  {
150  #if OS(WINDOWS)
151      // Cygwin calls ::SetErrorMode(SEM_FAILCRITICALERRORS), which we will inherit. This is bad for
152      // testing/debugging, as it causes the post-mortem debugger not to be invoked. We reset the
153      // error mode here to work around Cygwin's behavior. See <http://webkit.org/b/55222>.
154      ::SetErrorMode(0);
155  
156  #if defined(_DEBUG)
157      _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
158      _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
159      _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
160      _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE);
161      _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
162      _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE);
163  #endif
164  
165      timeBeginPeriod(1);
166  #endif
167  
168      JSC::initialize();
169  
170      // We can't use destructors in the following code because it uses Windows
171      // Structured Exception Handling
172      int res = 0;
173      TRY
174          res = realMain(argc, argv);
175      EXCEPT(res = 3)
176      return res;
177  }
178  
179  static bool testOneRegExp(JSGlobalObject* globalObject, RegExp* regexp, RegExpTest* regExpTest, bool verbose, unsigned lineNumber)
180  {
181      bool result = true;
182      Vector<int> outVector;
183      outVector.resize(regExpTest->expectVector.size());
184      int matchResult = regexp->match(globalObject, regExpTest->subject, regExpTest->offset, outVector);
185  
186      if (matchResult != regExpTest->result) {
187          result = false;
188          if (verbose)
189              printf("Line %d: results mismatch - expected %d got %d\n", lineNumber, regExpTest->result, matchResult);
190      } else if (matchResult != -1) {
191          if (outVector.size() != regExpTest->expectVector.size()) {
192              result = false;
193              if (verbose) {
194  #if OS(WINDOWS)
195                  printf("Line %d: output vector size mismatch - expected %Iu got %Iu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
196  #else
197                  printf("Line %d: output vector size mismatch - expected %zu got %zu\n", lineNumber, regExpTest->expectVector.size(), outVector.size());
198  #endif
199              }
200          } else if (outVector.size() % 2) {
201              result = false;
202              if (verbose) {
203  #if OS(WINDOWS)
204                  printf("Line %d: output vector size is odd (%Iu), should be even\n", lineNumber, outVector.size());
205  #else
206                  printf("Line %d: output vector size is odd (%zu), should be even\n", lineNumber, outVector.size());
207  #endif
208              }
209          } else {
210              // Check in pairs since the first value of the pair could be -1 in which case the second doesn't matter.
211              size_t pairCount = outVector.size() / 2;
212              for (size_t i = 0; i < pairCount; ++i) {
213                  size_t startIndex = i*2;
214                  if (outVector[startIndex] != regExpTest->expectVector[startIndex]) {
215                      result = false;
216                      if (verbose) {
217  #if OS(WINDOWS)
218                          printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
219  #else
220                          printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex, regExpTest->expectVector[startIndex], outVector[startIndex]);
221  #endif
222                      }
223                  }
224                  if ((i > 0) && (regExpTest->expectVector[startIndex] != -1) && (outVector[startIndex+1] != regExpTest->expectVector[startIndex+1])) {
225                      result = false;
226                      if (verbose) {
227  #if OS(WINDOWS)
228                          printf("Line %d: output vector mismatch at index %Iu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
229  #else
230                          printf("Line %d: output vector mismatch at index %zu - expected %d got %d\n", lineNumber, startIndex + 1, regExpTest->expectVector[startIndex + 1], outVector[startIndex + 1]);
231  #endif
232                      }
233                  }
234              }
235          }
236      }
237  
238      return result;
239  }
240  
241  static int scanString(char* buffer, int bufferLength, StringBuilder& builder, char termChar)
242  {
243      bool escape = false;
244      
245      for (int i = 0; i < bufferLength; ++i) {
246          UChar c = buffer[i];
247          
248          if (escape) {
249              switch (c) {
250              case '0':
251                  c = '\0';
252                  break;
253              case 'a':
254                  c = '\a';
255                  break;
256              case 'b':
257                  c = '\b';
258                  break;
259              case 'f':
260                  c = '\f';
261                  break;
262              case 'n':
263                  c = '\n';
264                  break;
265              case 'r':
266                  c = '\r';
267                  break;
268              case 't':
269                  c = '\t';
270                  break;
271              case 'v':
272                  c = '\v';
273                  break;
274              case '\\':
275                  c = '\\';
276                  break;
277              case '?':
278                  c = '\?';
279                  break;
280              case 'u':
281                  if ((i + 4) >= bufferLength)
282                      return -1;
283                  unsigned int charValue;
284                  if (sscanf(buffer+i+1, "%04x", &charValue) != 1)
285                      return -1;
286                  c = static_cast<UChar>(charValue);
287                  i += 4;
288                  break;
289              }
290              
291              builder.append(c);
292              escape = false;
293          } else {
294              if (c == termChar)
295                  return i;
296  
297              if (c == '\\')
298                  escape = true;
299              else
300                  builder.append(c);
301          }
302      }
303  
304      return -1;
305  }
306  
307  static RegExp* parseRegExpLine(VM& vm, char* line, int lineLength, const char** regexpError)
308  {
309      StringBuilder pattern;
310  
311      if (line[0] != '/')
312          return nullptr;
313  
314      int i = scanString(line + 1, lineLength - 1, pattern, '/') + 1;
315  
316      if ((i >= lineLength) || (line[i] != '/'))
317          return nullptr;
318  
319      ++i;
320  
321      auto flags = Yarr::parseFlags(line + i);
322      if (!flags) {
323          *regexpError = Yarr::errorMessage(Yarr::ErrorCode::InvalidRegularExpressionFlags);
324          return nullptr;
325      }
326  
327      RegExp* r = RegExp::create(vm, pattern.toString(), flags.value());
328      if (!r->isValid()) {
329          *regexpError = r->errorMessage();
330          return nullptr;
331      }
332  
333      return r;
334  }
335  
336  static RegExpTest* parseTestLine(char* line, int lineLength)
337  {
338      StringBuilder subjectString;
339      
340      if ((line[0] != ' ') || (line[1] != '"'))
341          return nullptr;
342  
343      int i = scanString(line + 2, lineLength - 2, subjectString, '"') + 2;
344  
345      if ((i >= (lineLength - 2)) || (line[i] != '"') || (line[i+1] != ',') || (line[i+2] != ' '))
346          return nullptr;
347  
348      i += 3;
349      
350      int offset;
351      
352      if (sscanf(line + i, "%d, ", &offset) != 1)
353          return nullptr;
354  
355      while (line[i] && line[i] != ' ')
356          ++i;
357  
358      ++i;
359      
360      int matchResult;
361      
362      if (sscanf(line + i, "%d, ", &matchResult) != 1)
363          return nullptr;
364      
365      while (line[i] && line[i] != ' ')
366          ++i;
367      
368      ++i;
369      
370      if (line[i++] != '(')
371          return nullptr;
372  
373      int start, end;
374      
375      RegExpTest* result = new RegExpTest();
376      
377      result->subject = subjectString.toString();
378      result->offset = offset;
379      result->result = matchResult;
380  
381      while (line[i] && line[i] != ')') {
382          if (sscanf(line + i, "%d, %d", &start, &end) != 2) {
383              delete result;
384              return nullptr;
385          }
386  
387          result->expectVector.append(start);
388          result->expectVector.append(end);
389  
390          while (line[i] && (line[i] != ',') && (line[i] != ')'))
391              i++;
392          i++;
393          while (line[i] && (line[i] != ',') && (line[i] != ')'))
394              i++;
395  
396          if (line[i] == ')')
397              break;
398          if (!line[i] || (line[i] != ',')) {
399              delete result;
400              return nullptr;
401          }
402          i++;
403      }
404  
405      return result;
406  }
407  
408  static bool runFromFiles(GlobalObject* globalObject, const Vector<String>& files, bool verbose)
409  {
410      String script;
411      String fileName;
412      Vector<char> scriptBuffer;
413      unsigned tests = 0;
414      unsigned failures = 0;
415      Vector<char> lineBuffer(MaxLineLength + 1);
416  
417      VM& vm = globalObject->vm();
418  
419      bool success = true;
420      for (size_t i = 0; i < files.size(); i++) {
421          FILE* testCasesFile = fopen(files[i].utf8().data(), "rb");
422  
423          if (!testCasesFile) {
424              printf("Unable to open test data file \"%s\"\n", files[i].utf8().data());
425              continue;
426          }
427              
428          RegExp* regexp = nullptr;
429          size_t lineLength = 0;
430          char* linePtr = nullptr;
431          unsigned int lineNumber = 0;
432          const char* regexpError = nullptr;
433  
434          while ((linePtr = fgets(lineBuffer.data(), MaxLineLength, testCasesFile))) {
435              lineLength = strlen(linePtr);
436              if (linePtr[lineLength - 1] == '\n') {
437                  linePtr[lineLength - 1] = '\0';
438                  --lineLength;
439              }
440              ++lineNumber;
441  
442              if (linePtr[0] == '#')
443                  continue;
444  
445              if (linePtr[0] == '/') {
446                  regexp = parseRegExpLine(vm, linePtr, lineLength, &regexpError);
447                  if (!regexp) {
448                      failures++;
449                      fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr, regexpError);
450                  }
451              } else if (linePtr[0] == ' ') {
452                  RegExpTest* regExpTest = parseTestLine(linePtr, lineLength);
453                  
454                  if (regexp && regExpTest) {
455                      ++tests;
456                      if (!testOneRegExp(globalObject, regexp, regExpTest, verbose, lineNumber)) {
457                          failures++;
458                          printf("Failure on line %u\n", lineNumber);
459                      }
460                  }
461                  
462                  if (regExpTest)
463                      delete regExpTest;
464              } else if (linePtr[0] == '-') {
465                  tests++;
466                  regexp = nullptr; // Reset the live regexp to avoid confusing other subsequent tests
467                  bool successfullyParsed = parseRegExpLine(vm, linePtr + 1, lineLength - 1, &regexpError);
468                  if (successfullyParsed) {
469                      failures++;
470                      fprintf(stderr, "Failure on line %u. '%s' %s\n", lineNumber, linePtr + 1, regexpError);
471                  }
472              }
473          }
474          
475          fclose(testCasesFile);
476      }
477  
478      if (failures)
479          printf("%u tests run, %u failures\n", tests, failures);
480      else
481          printf("%u tests passed\n", tests);
482  
483  #if ENABLE(REGEXP_TRACING)
484      vm.dumpRegExpTrace();
485  #endif
486      return success;
487  }
488  
489  #define RUNNING_FROM_XCODE 0
490  
491  static NO_RETURN void printUsageStatement(bool help = false)
492  {
493      fprintf(stderr, "Usage: regexp_test [options] file\n");
494      fprintf(stderr, "  -h|--help  Prints this help message\n");
495      fprintf(stderr, "  -v|--verbose  Verbose output\n");
496  
497      exit(help ? EXIT_SUCCESS : EXIT_FAILURE);
498  }
499  
500  static void parseArguments(int argc, char** argv, CommandLine& options)
501  {
502      int i = 1;
503      for (; i < argc; ++i) {
504          const char* arg = argv[i];
505          if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
506              printUsageStatement(true);
507          if (!strcmp(arg, "-v") || !strcmp(arg, "--verbose"))
508              options.verbose = true;
509          else
510              options.files.append(argv[i]);
511      }
512  
513      for (; i < argc; ++i)
514          options.arguments.append(argv[i]);
515  }
516  
517  int realMain(int argc, char** argv)
518  {
519      VM* vm = &VM::create(LargeHeap).leakRef();
520      JSLockHolder locker(vm);
521  
522      CommandLine options;
523      parseArguments(argc, argv, options);
524  
525      GlobalObject* globalObject = GlobalObject::create(*vm, GlobalObject::createStructure(*vm, jsNull()), options.arguments);
526      bool success = runFromFiles(globalObject, options.files, options.verbose);
527  
528      return success ? 0 : 3;
529  }
530  
531  #if OS(WINDOWS)
532  extern "C" __declspec(dllexport) int WINAPI dllLauncherEntryPoint(int argc, const char* argv[])
533  {
534      return main(argc, const_cast<char**>(argv));
535  }
536  #endif