/ KeywordLookupGenerator.py
KeywordLookupGenerator.py
  1  # Copyright (C) 2011-2019 Apple Inc. All rights reserved.
  2  # Copyright (C) 2012 Sony Network Entertainment. All rights reserved.
  3  #
  4  # Redistribution and use in source and binary forms, with or without
  5  # modification, are permitted provided that the following conditions
  6  # are met:
  7  # 1. Redistributions of source code must retain the above copyright
  8  #    notice, this list of conditions and the following disclaimer.
  9  # 2. Redistributions in binary form must reproduce the above copyright
 10  #    notice, this list of conditions and the following disclaimer in the
 11  #    documentation and/or other materials provided with the distribution.
 12  #
 13  # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 14  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 15  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 16  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 17  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 18  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 19  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 20  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 21  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 22  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 23  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24  
 25  import sys
 26  import string
 27  import operator
 28  
 29  keywordsText = open(sys.argv[1]).read()
 30  
 31  # A second argument signifies that the output
 32  # should be redirected to a file
 33  redirect_to_file = len(sys.argv) > 2
 34  
 35  # Change stdout to point to the file if requested
 36  if redirect_to_file:
 37      file_output = open(sys.argv[-1], "w")
 38      sys.stdout = file_output
 39  
 40  # Observed weights of the most common keywords, rounded to 2.s.d
 41  keyWordWeights = {
 42      "catch": 0.01,
 43      "try": 0.01,
 44      "while": 0.01,
 45      "case": 0.01,
 46      "break": 0.01,
 47      "new": 0.01,
 48      "in": 0.01,
 49      "typeof": 0.02,
 50      "true": 0.02,
 51      "false": 0.02,
 52      "for": 0.03,
 53      "null": 0.03,
 54      "else": 0.03,
 55      "return": 0.13,
 56      "var": 0.13,
 57      "if": 0.16,
 58      "function": 0.18,
 59      "this": 0.18,
 60  }
 61  
 62  
 63  def allWhitespace(str):
 64      for c in str:
 65          if not(c in string.whitespace):
 66              return False
 67      return True
 68  
 69  
 70  def parseKeywords(keywordsText):
 71  
 72      if sys.platform == "cygwin":
 73          keywordsText = keywordsText.replace("\r\n", "\n")
 74  
 75      lines = keywordsText.split("\n")
 76      lines = [line.split("#")[0] for line in lines]
 77      lines = [line for line in lines if (not allWhitespace(line))]
 78      name = lines[0].split()
 79      terminator = lines[-1]
 80  
 81      if not name[0] == "@begin":
 82          raise Exception("expected description beginning with @begin")
 83      if not terminator == "@end":
 84          raise Exception("expected description ending with @end")
 85  
 86      lines = lines[1:-1]  # trim off the old heading
 87      return [line.split() for line in lines]
 88  
 89  
 90  def makePadding(size):
 91      str = ""
 92      for i in range(size):
 93          str = str + " "
 94      return str
 95  
 96  
 97  class Trie:
 98      def __init__(self, prefix):
 99          self.prefix = prefix
100          self.keys = {}
101          self.value = None
102  
103      def insert(self, key, value):
104          if len(key) == 0:
105              self.value = value
106              return
107          if not (key[0] in self.keys):
108              self.keys[key[0]] = Trie(key[0])
109          self.keys[key[0]].insert(key[1:], value)
110  
111      def coalesce(self):
112          keys = {}
113          for k, v in self.keys.items():
114              t = v.coalesce()
115              keys[t.prefix] = t
116          self.keys = keys
117          if self.value != None:
118              return self
119          if len(self.keys) != 1:
120              return self
121          # Python 3: for() loop for compatibility. Use next() when Python 2.6 is the baseline.
122          for (prefix, suffix) in self.keys.items():
123              res = Trie(self.prefix + prefix)
124              res.value = suffix.value
125              res.keys = suffix.keys
126              return res
127  
128      def fillOut(self, prefix=""):
129          self.fullPrefix = prefix + self.prefix
130          weight = 0
131          if self.fullPrefix in keyWordWeights:
132              weight = weight + keyWordWeights[self.fullPrefix]
133          self.selfWeight = weight
134          for trie in self.keys.values():
135              trie.fillOut(self.fullPrefix)
136              weight = weight + trie.weight
137          self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)]
138          self.weight = weight
139  
140      def printSubTreeAsC(self, typeName, indent):
141          str = makePadding(indent)
142  
143          if self.value != None:
144              print(str + "if (LIKELY(cannotBeIdentPartOrEscapeStart(code[%d]))) {" % (len(self.fullPrefix)))
145              print(str + "    internalShift<%d>();" % len(self.fullPrefix))
146              print(str + "    if (shouldCreateIdentifier)")
147              print(str + ("        data->ident = &m_vm.propertyNames->%sKeyword;" % self.fullPrefix))
148              print(str + "    return " + self.value + ";")
149              print(str + "}")
150          rootIndex = len(self.fullPrefix)
151          itemCount = 0
152          for k, trie in self.keys:
153              baseIndex = rootIndex
154              if (baseIndex > 0) and (len(k) == 3):
155                  baseIndex = baseIndex - 1
156                  k = trie.fullPrefix[baseIndex] + k
157              test = [("'%s'" % c) for c in k]
158              if len(test) == 1:
159                  comparison = "code[%d] == %s" % (baseIndex, test[0])
160              else:
161                  base = "code"
162                  if baseIndex > 0:
163                      base = "code + %d" % baseIndex
164                  comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")"
165              if itemCount == 0:
166                  print(str + "if (" + comparison + ") {")
167              else:
168                  print(str + "} else if (" + comparison + ") {")
169  
170              trie.printSubTreeAsC(typeName, indent + 4)
171              itemCount = itemCount + 1
172  
173              if itemCount == len(self.keys):
174                  print(str + "}")
175  
176      def maxLength(self):
177          max = len(self.fullPrefix)
178          for (_, trie) in self.keys:
179              l = trie.maxLength()
180              if l > max:
181                  max = l
182          return max
183  
184      def printAsC(self):
185          print("namespace JSC {")
186          print("")
187          print("static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(LChar);")
188          print("static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(UChar);")
189          # max length + 1 so we don't need to do any bounds checking at all
190          print("static constexpr int maxTokenLength = %d;" % (self.maxLength() + 1))
191          print("")
192          print("template <>")
193          print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)")
194          print("{")
195          print("    ASSERT(m_codeEnd - m_code >= maxTokenLength);")
196          print("")
197          print("    const UChar* code = m_code;")
198          self.printSubTreeAsC("UCHAR", 4)
199          print("    return IDENT;")
200          print("}")
201          print("")
202          print("template <>")
203          print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)")
204          print("{")
205          print("    ASSERT(m_codeEnd - m_code >= maxTokenLength);")
206          print("")
207          print("    const LChar* code = m_code;")
208          self.printSubTreeAsC("CHAR", 4)
209          print("    return IDENT;")
210          print("}")
211          print("")
212          print("} // namespace JSC")
213  
214  keywords = parseKeywords(keywordsText)
215  trie = Trie("")
216  for k, v in keywords:
217      trie.insert(k, v)
218  trie.coalesce()
219  trie.fillOut()
220  print("// This file was generated by KeywordLookupGenerator.py.  Do not edit.")
221  print("""
222  #if CPU(NEEDS_ALIGNED_ACCESS)
223  
224  #define COMPARE_2CHARS(address, char1, char2) \\
225      (((address)[0] == char1) && ((address)[1] == char2))
226  #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
227      (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
228  #define COMPARE_2UCHARS(address, char1, char2) \\
229      (((address)[0] == char1) && ((address)[1] == char2))
230  #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
231      (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
232  
233  #else // CPU(NEEDS_ALIGNED_ACCESS)
234  
235  #if CPU(BIG_ENDIAN)
236  
237  #define CHARPAIR_TOUINT16(a, b) \\
238      ((((uint16_t)(a)) << 8) + (uint16_t)(b))
239  #define CHARQUAD_TOUINT32(a, b, c, d) \\
240      ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d))
241  #define UCHARPAIR_TOUINT32(a, b) \\
242      ((((uint32_t)(a)) << 16) + (uint32_t)(b))
243  #define UCHARQUAD_TOUINT64(a, b, c, d) \\
244      ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d))
245  
246  #else // CPU(BIG_ENDIAN)
247  
248  #define CHARPAIR_TOUINT16(a, b) \\
249      ((((uint16_t)(b)) << 8) + (uint16_t)(a))
250  #define CHARQUAD_TOUINT32(a, b, c, d) \\
251      ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b))
252  #define UCHARPAIR_TOUINT32(a, b) \\
253      ((((uint32_t)(b)) << 16) + (uint32_t)(a))
254  #define UCHARQUAD_TOUINT64(a, b, c, d) \\
255      ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b))
256  
257  #endif // CPU(BIG_ENDIAN)
258  
259  
260  #define COMPARE_2CHARS(address, char1, char2) \\
261      ((reinterpret_cast<const uint16_t*>(address))[0] == CHARPAIR_TOUINT16(char1, char2))
262  #define COMPARE_2UCHARS(address, char1, char2) \\
263      ((reinterpret_cast<const uint32_t*>(address))[0] == UCHARPAIR_TOUINT32(char1, char2))
264  
265  #if CPU(X86_64)
266  
267  #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
268      ((reinterpret_cast<const uint32_t*>(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4))
269  #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
270      ((reinterpret_cast<const uint64_t*>(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4))
271  
272  #else // CPU(X86_64)
273  
274  #define COMPARE_4CHARS(address, char1, char2, char3, char4) \\
275      (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4))
276  #define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\
277      (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4))
278  
279  #endif // CPU(X86_64)
280  
281  #endif // CPU(NEEDS_ALIGNED_ACCESS)
282  
283  #define COMPARE_3CHARS(address, char1, char2, char3) \\
284      (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3)))
285  #define COMPARE_3UCHARS(address, char1, char2, char3) \\
286      (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3)))
287  #define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\
288      (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
289  #define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\
290      (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5)))
291  #define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\
292      (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6))
293  #define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\
294      (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6))
295  #define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
296      (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7))
297  #define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\
298      (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7))
299  #define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
300      (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8))
301  #define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\
302      (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8))
303  """)
304  
305  trie.printAsC()
306  
307  # Close the redirected file if requested
308  if (redirect_to_file):
309      file_output.close()
310      sys.stdout = sys.__stdout__