/ scripts / reflib.py
reflib.py
  1  #!/usr/bin/python3
  2  #
  3  # Copyright (c) 2016-2019 The Khronos Group Inc.
  4  #
  5  # Licensed under the Apache License, Version 2.0 (the "License");
  6  # you may not use this file except in compliance with the License.
  7  # You may obtain a copy of the License at
  8  #
  9  #     http://www.apache.org/licenses/LICENSE-2.0
 10  #
 11  # Unless required by applicable law or agreed to in writing, software
 12  # distributed under the License is distributed on an "AS IS" BASIS,
 13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  # See the License for the specific language governing permissions and
 15  # limitations under the License.
 16  
 17  # Utility functions for automatic ref page generation
 18  
 19  import io
 20  import re
 21  import sys
 22  
 23  # global errFile, warnFile, diagFile
 24  
 25  errFile = sys.stderr
 26  warnFile = sys.stdout
 27  diagFile = None
 28  logSourcefile = None
 29  logProcname = None
 30  logLine = None
 31  
 32  # Remove \' escape sequences in a string (refpage description)
 33  def unescapeQuotes(s):
 34      return s.replace('\\\'', '\'')
 35  
 36  def write(*args, **kwargs ):
 37      file = kwargs.pop('file',sys.stdout)
 38      end = kwargs.pop('end','\n')
 39      file.write(' '.join(str(arg) for arg in args))
 40      file.write(end)
 41  
 42  # Metadata which may be printed (if not None) for diagnostic messages
 43  def setLogSourcefile(filename):
 44      global logSourcefile
 45      logSourcefile = filename
 46  
 47  def setLogProcname(procname):
 48      global logProcname
 49      logProcname = procname
 50  
 51  def setLogLine(line):
 52      global logLine
 53      logLine = line
 54  
 55  # Generate prefix for a diagnostic line using metadata and severity
 56  def logHeader(severity):
 57      global logSourcefile, logProcname, logLine
 58  
 59      msg = severity + ': '
 60      if logProcname:
 61          msg = msg + ' in ' + logProcname
 62      if logSourcefile:
 63          msg = msg + ' for ' + logSourcefile
 64      if logLine:
 65          msg = msg + ' line ' + str(logLine)
 66      return msg + ' '
 67  
 68  # Set the file handle to log either or both warnings and diagnostics to.
 69  # setDiag and setWarn are True if the corresponding handle is to be set.
 70  # filename is None for no logging, '-' for stdout, or a pathname.
 71  def setLogFile(setDiag, setWarn, filename):
 72      global diagFile, warnFile
 73  
 74      if filename is None:
 75          return
 76  
 77      if filename == '-':
 78          fp = sys.stdout
 79      else:
 80          fp = open(filename, 'w', encoding='utf-8')
 81  
 82      if setDiag:
 83          diagFile = fp
 84      if setWarn:
 85          warnFile = fp
 86  
 87  def logDiag(*args, **kwargs):
 88      file = kwargs.pop('file', diagFile)
 89      end = kwargs.pop('end','\n')
 90      if file is not None:
 91          file.write(logHeader('DIAG') + ' '.join(str(arg) for arg in args))
 92          file.write(end)
 93  
 94  def logWarn(*args, **kwargs):
 95      file = kwargs.pop('file', warnFile)
 96      end = kwargs.pop('end','\n')
 97      if file is not None:
 98          file.write(logHeader('WARN') + ' '.join(str(arg) for arg in args))
 99          file.write(end)
100  
101  def logErr(*args, **kwargs):
102      file = kwargs.pop('file', errFile)
103      end = kwargs.pop('end','\n')
104  
105      strfile = io.StringIO()
106      strfile.write(logHeader('ERROR') + ' '.join(str(arg) for arg in args))
107      strfile.write(end)
108  
109      if file is not None:
110          file.write(strfile.getvalue())
111      raise UserWarning(strfile.getvalue())
112  
113  # Return True if s is nothing but white space, False otherwise
114  def isempty(s):
115      return len(''.join(s.split())) == 0
116  
117  # pageInfo - information about a ref page relative to the file it's
118  # extracted from.
119  #
120  #   extractPage - True if page should be extracted
121  #   Warning - string warning if page is suboptimal or can't be generated
122  #   embed - False or the name of the ref page this include is embedded within
123  #
124  #   type - 'structs', 'protos', 'funcpointers', 'flags', 'enums'
125  #   name - struct/proto/enumerant/etc. name
126  #   desc - short description of ref page
127  #   begin - index of first line of the page (heuristic or // refBegin)
128  #   include - index of include:: line defining the page
129  #   param - index of first line of parameter/member definitions
130  #   body - index of first line of body text
131  #   validity - index of validity include
132  #   end - index of last line of the page (heuristic validity include, or // refEnd)
133  #   refs - cross-references on // refEnd line, if supplied
134  #   spec - 'spec' attribute in refpage open block, if supplied, or None
135  #       for the default ('api') type
136  #   anchor - 'anchor' attribute in refpage open block, if supplied, or
137  #       inferred to be the same as the 'name'
138  class pageInfo:
139      def __init__(self):
140          self.extractPage = True
141          self.Warning  = None
142          self.embed    = False
143  
144          self.type     = None
145          self.name     = None
146          self.desc     = None
147          self.begin    = None
148          self.include  = None
149          self.param    = None
150          self.body     = None
151          self.validity = None
152          self.end      = None
153          self.refs     = ''
154          self.spec     = None
155          self.anchor   = None
156  
157  # Print a single field of a pageInfo struct, possibly None
158  #   desc - string description of field
159  #   line - field value or None
160  #   file - indexed by line
161  def printPageInfoField(desc, line, file):
162      if line is not None:
163          logDiag(desc + ':', line + 1, '\t-> ', file[line], end='')
164      else:
165          logDiag(desc + ':', line)
166  
167  # Print out fields of a pageInfo struct
168  #   pi - pageInfo
169  #   file - indexed by pageInfo
170  def printPageInfo(pi, file):
171      logDiag('TYPE:   ', pi.type)
172      logDiag('NAME:   ', pi.name)
173      logDiag('WARNING:', pi.Warning)
174      logDiag('EXTRACT:', pi.extractPage)
175      logDiag('EMBED:  ', pi.embed)
176      logDiag('DESC:   ', pi.desc)
177      printPageInfoField('BEGIN   ', pi.begin,    file)
178      printPageInfoField('INCLUDE ', pi.include,  file)
179      printPageInfoField('PARAM   ', pi.param,    file)
180      printPageInfoField('BODY    ', pi.body,     file)
181      printPageInfoField('VALIDITY', pi.validity, file)
182      printPageInfoField('END     ', pi.end,      file)
183      logDiag('REFS: "' + pi.refs + '"')
184  
185  # Go back one paragraph from the specified line and return the line number
186  # of the first line of that paragraph.
187  #
188  # Paragraphs are delimited by blank lines. It is assumed that the
189  # current line is the first line of a paragraph.
190  #   file is an array of strings
191  #   line is the starting point (zero-based)
192  def prevPara(file, line):
193      # Skip over current paragraph
194      while (line >= 0 and not isempty(file[line])):
195          line = line - 1
196      # Skip over white space
197      while (line >= 0 and isempty(file[line])):
198          line = line - 1
199      # Skip to first line of previous paragraph
200      while (line >= 1 and not isempty(file[line-1])):
201          line = line - 1
202      return line
203  
204  # Go forward one paragraph from the specified line and return the line
205  # number of the first line of that paragraph.
206  #
207  # Paragraphs are delimited by blank lines. It is assumed that the
208  # current line is standalone (which is bogus)
209  #   file is an array of strings
210  #   line is the starting point (zero-based)
211  def nextPara(file, line):
212      maxLine = len(file) - 1
213      # Skip over current paragraph
214      while (line != maxLine and not isempty(file[line])):
215          line = line + 1
216      # Skip over white space
217      while (line != maxLine and isempty(file[line])):
218          line = line + 1
219      return line
220  
221  # Return (creating if needed) the pageInfo entry in pageMap for name
222  def lookupPage(pageMap, name):
223      if name not in pageMap:
224          pi = pageInfo()
225          pi.name = name
226          pageMap[name] = pi
227      else:
228          pi = pageMap[name]
229      return pi
230  
231  # Load a file into a list of strings. Return the list or None on failure
232  def loadFile(filename):
233      try:
234          fp = open(filename, 'r', encoding='utf-8')
235      except:
236          logWarn('Cannot open file', filename, ':', sys.exc_info()[0])
237          return None
238  
239      file = fp.readlines()
240      fp.close()
241  
242      return file
243  
244  # Clamp a line number to be in the range [minline,maxline].
245  # If the line number is None, just return it.
246  # If minline is None, don't clamp to that value.
247  def clampToBlock(line, minline, maxline):
248      if line is None:
249          return line
250      if minline and line < minline:
251          return minline
252      if line > maxline:
253          return maxline
254  
255      return line
256  
257  # Fill in missing fields in pageInfo structures, to the extent they can be
258  # inferred.
259  #   pageMap - dictionary of pageInfo structures
260  #   specFile - filename
261  #   file - list of strings making up the file, indexed by pageInfo
262  def fixupRefs(pageMap, specFile, file):
263      # All potential ref pages are now in pageMap. Process them to
264      # identify actual page start/end/description boundaries, if
265      # not already determined from the text.
266      for name in sorted(pageMap.keys()):
267          pi = pageMap[name]
268  
269          # # If nothing is found but an include line with no begin, validity,
270          # # or end, this is not intended as a ref page (yet). Set the begin
271          # # line to the include line, so autogeneration can at least
272          # # pull the include out, but mark it not to be extracted.
273          # # Examples include the host sync table includes in
274          # # chapters/fundamentals.txt and the table of Vk*Flag types in
275          # # appendices/boilerplate.txt.
276          # if pi.begin is None and pi.validity is None and pi.end is None:
277          #     pi.begin = pi.include
278          #     pi.extractPage = False
279          #     pi.Warning = 'No begin, validity, or end lines identified'
280          #     continue
281  
282          # Using open block delimiters, ref pages must *always* have a
283          # defined begin and end. If either is undefined, that's fatal.
284          if pi.begin is None:
285              pi.extractPage = False
286              pi.Warning = 'Can\'t identify begin of ref page open block'
287              continue
288  
289          if pi.end is None:
290              pi.extractPage = False
291              pi.Warning = 'Can\'t identify end of ref page open block'
292              continue
293  
294          # If there's no description of the page, infer one from the type
295          if pi.desc is None:
296              if pi.type is not None:
297                  # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)'
298                  pi.Warning = 'No short description available; could infer from the type and name'
299              else:
300                  pi.extractPage = False
301                  pi.Warning = 'No short description available, cannot infer from the type'
302                  continue
303  
304          # Try to determine where the parameter and body sections of the page
305          # begin. funcpointer, proto, and struct pages infer the location of
306          # the parameter and body sections. Other pages infer the location of
307          # the body, but have no parameter sections.
308          if pi.include is not None:
309              if pi.type in ['funcpointers', 'protos', 'structs']:
310                  pi.param = nextPara(file, pi.include)
311                  if pi.body is None:
312                      pi.body = nextPara(file, pi.param)
313              else:
314                  if pi.body is None:
315                      pi.body = nextPara(file, pi.include)
316          else:
317              pi.Warning = 'Page does not have an API definition include::'
318  
319          # It's possible for the inferred param and body lines to run past
320          # the end of block, if, for example, there is no parameter section.
321          pi.param = clampToBlock(pi.param, pi.include, pi.end)
322          pi.body = clampToBlock(pi.body, pi.param, pi.end)
323  
324          # We can get to this point with .include, .param, and .validity
325          # all being None, indicating those sections weren't found.
326  
327          logDiag('fixupRefs: after processing,', pi.name, 'looks like:')
328          printPageInfo(pi, file)
329  
330      # Now that all the valid pages have been found, try to make some
331      # inferences about invalid pages.
332      #
333      # If a reference without a .end is entirely inside a valid reference,
334      # then it's intentionally embedded - may want to create an indirect
335      # page that links into the embedding page. This is done by a very
336      # inefficient double loop, but the loop depth is small.
337      for name in sorted(pageMap.keys()):
338          pi = pageMap[name]
339  
340          if pi.end is None:
341              for embedName in sorted(pageMap.keys()):
342                  logDiag('fixupRefs: comparing', pi.name, 'to', embedName)
343                  embed = pageMap[embedName]
344                  # Don't check embeddings which are themselves invalid
345                  if not embed.extractPage:
346                      logDiag('Skipping check for embedding in:', embed.name)
347                      continue
348                  if embed.begin is None or embed.end is None:
349                      logDiag('fixupRefs:', name + ':',
350                              'can\'t compare to unanchored ref:', embed.name,
351                              'in', specFile, 'at line', pi.include )
352                      printPageInfo(pi, file)
353                      printPageInfo(embed, file)
354                  # If an embed is found, change the error to a warning
355                  elif (pi.include is not None and pi.include >= embed.begin and
356                        pi.include <= embed.end):
357                      logDiag('fixupRefs: Found embed for:', name,
358                              'inside:', embedName,
359                              'in', specFile, 'at line', pi.include )
360                      pi.embed = embed.name
361                      pi.Warning = 'Embedded in definition for ' + embed.name
362                      break
363                  else:
364                      logDiag('fixupRefs: No embed match for:', name,
365                              'inside:', embedName, 'in', specFile,
366                              'at line', pi.include)
367  
368  
369  # Patterns used to recognize interesting lines in an asciidoc source file.
370  # These patterns are only compiled once.
371  INCSVAR_DEF = re.compile(r':INCS-VAR: (?P<value>.*)')
372  endifPat   = re.compile(r'^endif::(?P<condition>[\w_+,]+)\[\]')
373  beginPat   = re.compile(r'^\[open,(?P<attribs>refpage=.*)\]')
374  # attribute key/value pairs of an open block
375  attribStr  = r"([a-z]+)='([^'\\]*(?:\\.[^'\\]*)*)'"
376  attribPat  = re.compile(attribStr)
377  bodyPat    = re.compile(r'^// *refBody')
378  errorPat   = re.compile(r'^// *refError')
379  
380  # This regex transplanted from check_spec_links
381  # It looks for either OpenXR or Vulkan generated file conventions, and for
382  # the api/validity include (generated_type), protos/struct/etc path
383  # (category), and API name (entity_name). It could be put into the API
384  # conventions object.
385  INCLUDE = re.compile(
386          r'include::(?P<directory_traverse>((../){1,4}|\{INCS-VAR\}/|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).txt[\[][\]]')
387  
388  
389  # Identify reference pages in a list of strings, returning a dictionary of
390  # pageInfo entries for each one found, or None on failure.
391  def findRefs(file, filename):
392      setLogSourcefile(filename)
393      setLogProcname('findRefs')
394  
395      # To reliably detect the open blocks around reference pages, we must
396      # first detect the '[open,refpage=...]' markup delimiting the block;
397      # skip past the '--' block delimiter on the next line; and identify the
398      # '--' block delimiter closing the page.
399      # This can't be done solely with pattern matching, and requires state to
400      # track 'inside/outside block'.
401      # When looking for open blocks, possible states are:
402      #   'outside' - outside a block
403      #   'start' - have found the '[open...]' line
404      #   'inside' - have found the following '--' line
405      openBlockState = 'outside'
406  
407      # Dictionary of interesting line numbers and strings related to an API
408      # name
409      pageMap = {}
410  
411      numLines = len(file)
412      line = 0
413  
414      # Track the pageInfo object corresponding to the current open block
415      pi = None
416      incsvar = None
417  
418      while (line < numLines):
419          setLogLine(line)
420  
421          # Look for a file-wide definition
422          matches = INCSVAR_DEF.match(file[line])
423          if matches:
424              incsvar = matches.group('value')
425              logDiag('Matched INCS-VAR definition:', incsvar)
426  
427              line = line + 1
428              continue
429  
430          # Perform INCS-VAR substitution immediately.
431          if incsvar and '{INCS-VAR}' in file[line]:
432              newLine = file[line].replace('{INCS-VAR}', incsvar)
433              logDiag('PERFORMING SUBSTITUTION', file[line], '->', newLine)
434              file[line] = newLine
435  
436          # Only one of the patterns can possibly match. Add it to
437          # the dictionary for that name.
438  
439          # [open,refpage=...] starting a refpage block
440          matches = beginPat.search(file[line])
441          if matches is not None:
442              logDiag('Matched open block pattern')
443              attribs = matches.group('attribs')
444  
445              # If the previous open block wasn't closed, raise an error
446              if openBlockState != 'outside':
447                  logErr('Nested open block starting at line', line, 'of',
448                         filename)
449  
450              openBlockState = 'start'
451  
452              # Parse the block attributes
453              matches = attribPat.findall(attribs)
454  
455              # Extract each attribute
456              name = None
457              desc = None
458              refpage_type = None
459              spec_type = None
460              anchor = None
461              xrefs = None
462  
463              for (key,value) in matches:
464                  logDiag('got attribute', key, '=', value)
465                  if key == 'refpage':
466                      name = value
467                  elif key == 'desc':
468                      desc = unescapeQuotes(value)
469                  elif key == 'type':
470                      refpage_type = value
471                  elif key == 'spec':
472                      spec_type = value
473                  elif key == 'anchor':
474                      anchor = value
475                  elif key == 'xrefs':
476                      xrefs = value
477                  else:
478                      logWarn('unknown open block attribute:', key)
479  
480              if name is None or desc is None or refpage_type is None:
481                  logWarn('missing one or more required open block attributes:'
482                          'refpage, desc, or type')
483                  # Leave pi is None so open block delimiters are ignored
484              else:
485                  pi = lookupPage(pageMap, name)
486                  pi.desc = desc
487                  # Must match later type definitions in interface/validity includes
488                  pi.type = refpage_type
489                  pi.spec = spec_type
490                  pi.anchor = anchor
491                  if xrefs:
492                      pi.refs = xrefs
493                  logDiag('open block for', name, 'added DESC =', desc,
494                          'TYPE =', refpage_type, 'XREFS =', xrefs,
495                          'SPEC =', spec_type, 'ANCHOR =', anchor)
496  
497              line = line + 1
498              continue
499  
500          # '--' starting or ending and open block
501          if file[line].rstrip() == '--':
502              if openBlockState == 'outside':
503                  # Only refpage open blocks should use -- delimiters
504                  logWarn('Unexpected double-dash block delimiters')
505              elif openBlockState == 'start':
506                  # -- delimiter following [open,refpage=...]
507                  openBlockState = 'inside'
508  
509                  if pi is None:
510                      logWarn('no pageInfo available for opening -- delimiter')
511                  else:
512                      pi.begin = line + 1
513                      logDiag('opening -- delimiter: added BEGIN =', pi.begin)
514              elif openBlockState == 'inside':
515                  # -- delimiter ending an open block
516                  if pi is None:
517                      logWarn('no pageInfo available for closing -- delimiter')
518                  else:
519                      pi.end = line - 1
520                      logDiag('closing -- delimiter: added END =', pi.end)
521  
522                  openBlockState = 'outside'
523                  pi = None
524              else:
525                  logWarn('unknown openBlockState:', openBlockState)
526  
527              line = line + 1
528              continue
529  
530          matches = INCLUDE.search(file[line])
531          if matches is not None:
532              # Something got included, not sure what yet.
533              gen_type = matches.group('generated_type')
534              refpage_type = matches.group('category')
535              name = matches.group('entity_name')
536  
537              # This will never match in OpenCL
538              if gen_type == 'validity':
539                  logDiag('Matched validity pattern')
540                  if pi is not None:
541                      if pi.type and refpage_type != pi.type:
542                          logWarn('ERROR: pageMap[' + name + '] type:',
543                                  pi.type, 'does not match type:', refpage_type)
544                      pi.type = refpage_type
545                      pi.validity = line
546                      logDiag('added TYPE =', pi.type, 'VALIDITY =', pi.validity)
547                  else:
548                      logWarn('validity include:: line NOT inside block')
549  
550                  line = line + 1
551                  continue
552  
553              if gen_type == 'api':
554                  logDiag('Matched include pattern')
555                  if pi is not None:
556                      if pi.include is not None:
557                          logDiag('found multiple includes for this block')
558                      if pi.type and refpage_type != pi.type:
559                          logWarn('ERROR: pageMap[' + name + '] type:',
560                                  pi.type, 'does not match type:', refpage_type)
561                      pi.type = refpage_type
562                      pi.include = line
563                      logDiag('added TYPE =', pi.type, 'INCLUDE =', pi.include)
564                  else:
565                      logWarn('interface include:: line NOT inside block')
566  
567                  line = line + 1
568                  continue
569  
570              logDiag('ignoring unrecognized include line ', matches.group())
571  
572          # Vulkan 1.1 markup allows the last API include construct to be
573          # followed by an asciidoctor endif:: construct (and also preceded,
574          # at some distance).
575          # This looks for endif:: immediately following an include:: line
576          # and, if found, moves the include boundary to this line.
577          matches = endifPat.search(file[line])
578          if matches is not None and pi is not None:
579              if pi.include == line - 1:
580                  logDiag('Matched endif pattern following include; moving include')
581                  pi.include = line
582              else:
583                  logDiag('Matched endif pattern (not following include)')
584  
585              line = line + 1
586              continue
587  
588          matches = bodyPat.search(file[line])
589          if matches is not None:
590              logDiag('Matched // refBody pattern')
591              if pi is not None:
592                  pi.body = line
593                  logDiag('added BODY =', pi.body)
594              else:
595                  logWarn('// refBody line NOT inside block')
596  
597              line = line + 1
598              continue
599  
600          # OpenCL spec uses // refError to tag "validity" (Errors) language,
601          # instead of /validity/ includes.
602          matches = errorPat.search(file[line])
603          if matches is not None:
604              logDiag('Matched // refError pattern')
605              if pi is not None:
606                  pi.validity = line
607                  logDiag('added VALIDITY (refError) =', pi.validity)
608              else:
609                  logWarn('// refError line NOT inside block')
610  
611              line = line + 1
612              continue
613  
614          line = line + 1
615          continue
616  
617      if pi is not None:
618          logErr('Unclosed open block at EOF!')
619  
620      setLogSourcefile(None)
621      setLogProcname(None)
622      setLogLine(None)
623  
624      return pageMap