reflib.py
1 #!/usr/bin/python3 2 # 3 # Copyright (c) 2016-2019 The Khronos Group Inc. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Utility functions for automatic ref page generation 18 19 import io 20 import re 21 import sys 22 23 # global errFile, warnFile, diagFile 24 25 errFile = sys.stderr 26 warnFile = sys.stdout 27 diagFile = None 28 logSourcefile = None 29 logProcname = None 30 logLine = None 31 32 # Remove \' escape sequences in a string (refpage description) 33 def unescapeQuotes(s): 34 return s.replace('\\\'', '\'') 35 36 def write(*args, **kwargs ): 37 file = kwargs.pop('file',sys.stdout) 38 end = kwargs.pop('end','\n') 39 file.write(' '.join(str(arg) for arg in args)) 40 file.write(end) 41 42 # Metadata which may be printed (if not None) for diagnostic messages 43 def setLogSourcefile(filename): 44 global logSourcefile 45 logSourcefile = filename 46 47 def setLogProcname(procname): 48 global logProcname 49 logProcname = procname 50 51 def setLogLine(line): 52 global logLine 53 logLine = line 54 55 # Generate prefix for a diagnostic line using metadata and severity 56 def logHeader(severity): 57 global logSourcefile, logProcname, logLine 58 59 msg = severity + ': ' 60 if logProcname: 61 msg = msg + ' in ' + logProcname 62 if logSourcefile: 63 msg = msg + ' for ' + logSourcefile 64 if logLine: 65 msg = msg + ' line ' + str(logLine) 66 return msg + ' ' 67 68 # Set the file handle to log either or both warnings and diagnostics to. 69 # setDiag and setWarn are True if the corresponding handle is to be set. 70 # filename is None for no logging, '-' for stdout, or a pathname. 71 def setLogFile(setDiag, setWarn, filename): 72 global diagFile, warnFile 73 74 if filename is None: 75 return 76 77 if filename == '-': 78 fp = sys.stdout 79 else: 80 fp = open(filename, 'w', encoding='utf-8') 81 82 if setDiag: 83 diagFile = fp 84 if setWarn: 85 warnFile = fp 86 87 def logDiag(*args, **kwargs): 88 file = kwargs.pop('file', diagFile) 89 end = kwargs.pop('end','\n') 90 if file is not None: 91 file.write(logHeader('DIAG') + ' '.join(str(arg) for arg in args)) 92 file.write(end) 93 94 def logWarn(*args, **kwargs): 95 file = kwargs.pop('file', warnFile) 96 end = kwargs.pop('end','\n') 97 if file is not None: 98 file.write(logHeader('WARN') + ' '.join(str(arg) for arg in args)) 99 file.write(end) 100 101 def logErr(*args, **kwargs): 102 file = kwargs.pop('file', errFile) 103 end = kwargs.pop('end','\n') 104 105 strfile = io.StringIO() 106 strfile.write(logHeader('ERROR') + ' '.join(str(arg) for arg in args)) 107 strfile.write(end) 108 109 if file is not None: 110 file.write(strfile.getvalue()) 111 raise UserWarning(strfile.getvalue()) 112 113 # Return True if s is nothing but white space, False otherwise 114 def isempty(s): 115 return len(''.join(s.split())) == 0 116 117 # pageInfo - information about a ref page relative to the file it's 118 # extracted from. 119 # 120 # extractPage - True if page should be extracted 121 # Warning - string warning if page is suboptimal or can't be generated 122 # embed - False or the name of the ref page this include is embedded within 123 # 124 # type - 'structs', 'protos', 'funcpointers', 'flags', 'enums' 125 # name - struct/proto/enumerant/etc. name 126 # desc - short description of ref page 127 # begin - index of first line of the page (heuristic or // refBegin) 128 # include - index of include:: line defining the page 129 # param - index of first line of parameter/member definitions 130 # body - index of first line of body text 131 # validity - index of validity include 132 # end - index of last line of the page (heuristic validity include, or // refEnd) 133 # refs - cross-references on // refEnd line, if supplied 134 # spec - 'spec' attribute in refpage open block, if supplied, or None 135 # for the default ('api') type 136 # anchor - 'anchor' attribute in refpage open block, if supplied, or 137 # inferred to be the same as the 'name' 138 class pageInfo: 139 def __init__(self): 140 self.extractPage = True 141 self.Warning = None 142 self.embed = False 143 144 self.type = None 145 self.name = None 146 self.desc = None 147 self.begin = None 148 self.include = None 149 self.param = None 150 self.body = None 151 self.validity = None 152 self.end = None 153 self.refs = '' 154 self.spec = None 155 self.anchor = None 156 157 # Print a single field of a pageInfo struct, possibly None 158 # desc - string description of field 159 # line - field value or None 160 # file - indexed by line 161 def printPageInfoField(desc, line, file): 162 if line is not None: 163 logDiag(desc + ':', line + 1, '\t-> ', file[line], end='') 164 else: 165 logDiag(desc + ':', line) 166 167 # Print out fields of a pageInfo struct 168 # pi - pageInfo 169 # file - indexed by pageInfo 170 def printPageInfo(pi, file): 171 logDiag('TYPE: ', pi.type) 172 logDiag('NAME: ', pi.name) 173 logDiag('WARNING:', pi.Warning) 174 logDiag('EXTRACT:', pi.extractPage) 175 logDiag('EMBED: ', pi.embed) 176 logDiag('DESC: ', pi.desc) 177 printPageInfoField('BEGIN ', pi.begin, file) 178 printPageInfoField('INCLUDE ', pi.include, file) 179 printPageInfoField('PARAM ', pi.param, file) 180 printPageInfoField('BODY ', pi.body, file) 181 printPageInfoField('VALIDITY', pi.validity, file) 182 printPageInfoField('END ', pi.end, file) 183 logDiag('REFS: "' + pi.refs + '"') 184 185 # Go back one paragraph from the specified line and return the line number 186 # of the first line of that paragraph. 187 # 188 # Paragraphs are delimited by blank lines. It is assumed that the 189 # current line is the first line of a paragraph. 190 # file is an array of strings 191 # line is the starting point (zero-based) 192 def prevPara(file, line): 193 # Skip over current paragraph 194 while (line >= 0 and not isempty(file[line])): 195 line = line - 1 196 # Skip over white space 197 while (line >= 0 and isempty(file[line])): 198 line = line - 1 199 # Skip to first line of previous paragraph 200 while (line >= 1 and not isempty(file[line-1])): 201 line = line - 1 202 return line 203 204 # Go forward one paragraph from the specified line and return the line 205 # number of the first line of that paragraph. 206 # 207 # Paragraphs are delimited by blank lines. It is assumed that the 208 # current line is standalone (which is bogus) 209 # file is an array of strings 210 # line is the starting point (zero-based) 211 def nextPara(file, line): 212 maxLine = len(file) - 1 213 # Skip over current paragraph 214 while (line != maxLine and not isempty(file[line])): 215 line = line + 1 216 # Skip over white space 217 while (line != maxLine and isempty(file[line])): 218 line = line + 1 219 return line 220 221 # Return (creating if needed) the pageInfo entry in pageMap for name 222 def lookupPage(pageMap, name): 223 if name not in pageMap: 224 pi = pageInfo() 225 pi.name = name 226 pageMap[name] = pi 227 else: 228 pi = pageMap[name] 229 return pi 230 231 # Load a file into a list of strings. Return the list or None on failure 232 def loadFile(filename): 233 try: 234 fp = open(filename, 'r', encoding='utf-8') 235 except: 236 logWarn('Cannot open file', filename, ':', sys.exc_info()[0]) 237 return None 238 239 file = fp.readlines() 240 fp.close() 241 242 return file 243 244 # Clamp a line number to be in the range [minline,maxline]. 245 # If the line number is None, just return it. 246 # If minline is None, don't clamp to that value. 247 def clampToBlock(line, minline, maxline): 248 if line is None: 249 return line 250 if minline and line < minline: 251 return minline 252 if line > maxline: 253 return maxline 254 255 return line 256 257 # Fill in missing fields in pageInfo structures, to the extent they can be 258 # inferred. 259 # pageMap - dictionary of pageInfo structures 260 # specFile - filename 261 # file - list of strings making up the file, indexed by pageInfo 262 def fixupRefs(pageMap, specFile, file): 263 # All potential ref pages are now in pageMap. Process them to 264 # identify actual page start/end/description boundaries, if 265 # not already determined from the text. 266 for name in sorted(pageMap.keys()): 267 pi = pageMap[name] 268 269 # # If nothing is found but an include line with no begin, validity, 270 # # or end, this is not intended as a ref page (yet). Set the begin 271 # # line to the include line, so autogeneration can at least 272 # # pull the include out, but mark it not to be extracted. 273 # # Examples include the host sync table includes in 274 # # chapters/fundamentals.txt and the table of Vk*Flag types in 275 # # appendices/boilerplate.txt. 276 # if pi.begin is None and pi.validity is None and pi.end is None: 277 # pi.begin = pi.include 278 # pi.extractPage = False 279 # pi.Warning = 'No begin, validity, or end lines identified' 280 # continue 281 282 # Using open block delimiters, ref pages must *always* have a 283 # defined begin and end. If either is undefined, that's fatal. 284 if pi.begin is None: 285 pi.extractPage = False 286 pi.Warning = 'Can\'t identify begin of ref page open block' 287 continue 288 289 if pi.end is None: 290 pi.extractPage = False 291 pi.Warning = 'Can\'t identify end of ref page open block' 292 continue 293 294 # If there's no description of the page, infer one from the type 295 if pi.desc is None: 296 if pi.type is not None: 297 # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)' 298 pi.Warning = 'No short description available; could infer from the type and name' 299 else: 300 pi.extractPage = False 301 pi.Warning = 'No short description available, cannot infer from the type' 302 continue 303 304 # Try to determine where the parameter and body sections of the page 305 # begin. funcpointer, proto, and struct pages infer the location of 306 # the parameter and body sections. Other pages infer the location of 307 # the body, but have no parameter sections. 308 if pi.include is not None: 309 if pi.type in ['funcpointers', 'protos', 'structs']: 310 pi.param = nextPara(file, pi.include) 311 if pi.body is None: 312 pi.body = nextPara(file, pi.param) 313 else: 314 if pi.body is None: 315 pi.body = nextPara(file, pi.include) 316 else: 317 pi.Warning = 'Page does not have an API definition include::' 318 319 # It's possible for the inferred param and body lines to run past 320 # the end of block, if, for example, there is no parameter section. 321 pi.param = clampToBlock(pi.param, pi.include, pi.end) 322 pi.body = clampToBlock(pi.body, pi.param, pi.end) 323 324 # We can get to this point with .include, .param, and .validity 325 # all being None, indicating those sections weren't found. 326 327 logDiag('fixupRefs: after processing,', pi.name, 'looks like:') 328 printPageInfo(pi, file) 329 330 # Now that all the valid pages have been found, try to make some 331 # inferences about invalid pages. 332 # 333 # If a reference without a .end is entirely inside a valid reference, 334 # then it's intentionally embedded - may want to create an indirect 335 # page that links into the embedding page. This is done by a very 336 # inefficient double loop, but the loop depth is small. 337 for name in sorted(pageMap.keys()): 338 pi = pageMap[name] 339 340 if pi.end is None: 341 for embedName in sorted(pageMap.keys()): 342 logDiag('fixupRefs: comparing', pi.name, 'to', embedName) 343 embed = pageMap[embedName] 344 # Don't check embeddings which are themselves invalid 345 if not embed.extractPage: 346 logDiag('Skipping check for embedding in:', embed.name) 347 continue 348 if embed.begin is None or embed.end is None: 349 logDiag('fixupRefs:', name + ':', 350 'can\'t compare to unanchored ref:', embed.name, 351 'in', specFile, 'at line', pi.include ) 352 printPageInfo(pi, file) 353 printPageInfo(embed, file) 354 # If an embed is found, change the error to a warning 355 elif (pi.include is not None and pi.include >= embed.begin and 356 pi.include <= embed.end): 357 logDiag('fixupRefs: Found embed for:', name, 358 'inside:', embedName, 359 'in', specFile, 'at line', pi.include ) 360 pi.embed = embed.name 361 pi.Warning = 'Embedded in definition for ' + embed.name 362 break 363 else: 364 logDiag('fixupRefs: No embed match for:', name, 365 'inside:', embedName, 'in', specFile, 366 'at line', pi.include) 367 368 369 # Patterns used to recognize interesting lines in an asciidoc source file. 370 # These patterns are only compiled once. 371 INCSVAR_DEF = re.compile(r':INCS-VAR: (?P<value>.*)') 372 endifPat = re.compile(r'^endif::(?P<condition>[\w_+,]+)\[\]') 373 beginPat = re.compile(r'^\[open,(?P<attribs>refpage=.*)\]') 374 # attribute key/value pairs of an open block 375 attribStr = r"([a-z]+)='([^'\\]*(?:\\.[^'\\]*)*)'" 376 attribPat = re.compile(attribStr) 377 bodyPat = re.compile(r'^// *refBody') 378 errorPat = re.compile(r'^// *refError') 379 380 # This regex transplanted from check_spec_links 381 # It looks for either OpenXR or Vulkan generated file conventions, and for 382 # the api/validity include (generated_type), protos/struct/etc path 383 # (category), and API name (entity_name). It could be put into the API 384 # conventions object. 385 INCLUDE = re.compile( 386 r'include::(?P<directory_traverse>((../){1,4}|\{INCS-VAR\}/|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).txt[\[][\]]') 387 388 389 # Identify reference pages in a list of strings, returning a dictionary of 390 # pageInfo entries for each one found, or None on failure. 391 def findRefs(file, filename): 392 setLogSourcefile(filename) 393 setLogProcname('findRefs') 394 395 # To reliably detect the open blocks around reference pages, we must 396 # first detect the '[open,refpage=...]' markup delimiting the block; 397 # skip past the '--' block delimiter on the next line; and identify the 398 # '--' block delimiter closing the page. 399 # This can't be done solely with pattern matching, and requires state to 400 # track 'inside/outside block'. 401 # When looking for open blocks, possible states are: 402 # 'outside' - outside a block 403 # 'start' - have found the '[open...]' line 404 # 'inside' - have found the following '--' line 405 openBlockState = 'outside' 406 407 # Dictionary of interesting line numbers and strings related to an API 408 # name 409 pageMap = {} 410 411 numLines = len(file) 412 line = 0 413 414 # Track the pageInfo object corresponding to the current open block 415 pi = None 416 incsvar = None 417 418 while (line < numLines): 419 setLogLine(line) 420 421 # Look for a file-wide definition 422 matches = INCSVAR_DEF.match(file[line]) 423 if matches: 424 incsvar = matches.group('value') 425 logDiag('Matched INCS-VAR definition:', incsvar) 426 427 line = line + 1 428 continue 429 430 # Perform INCS-VAR substitution immediately. 431 if incsvar and '{INCS-VAR}' in file[line]: 432 newLine = file[line].replace('{INCS-VAR}', incsvar) 433 logDiag('PERFORMING SUBSTITUTION', file[line], '->', newLine) 434 file[line] = newLine 435 436 # Only one of the patterns can possibly match. Add it to 437 # the dictionary for that name. 438 439 # [open,refpage=...] starting a refpage block 440 matches = beginPat.search(file[line]) 441 if matches is not None: 442 logDiag('Matched open block pattern') 443 attribs = matches.group('attribs') 444 445 # If the previous open block wasn't closed, raise an error 446 if openBlockState != 'outside': 447 logErr('Nested open block starting at line', line, 'of', 448 filename) 449 450 openBlockState = 'start' 451 452 # Parse the block attributes 453 matches = attribPat.findall(attribs) 454 455 # Extract each attribute 456 name = None 457 desc = None 458 refpage_type = None 459 spec_type = None 460 anchor = None 461 xrefs = None 462 463 for (key,value) in matches: 464 logDiag('got attribute', key, '=', value) 465 if key == 'refpage': 466 name = value 467 elif key == 'desc': 468 desc = unescapeQuotes(value) 469 elif key == 'type': 470 refpage_type = value 471 elif key == 'spec': 472 spec_type = value 473 elif key == 'anchor': 474 anchor = value 475 elif key == 'xrefs': 476 xrefs = value 477 else: 478 logWarn('unknown open block attribute:', key) 479 480 if name is None or desc is None or refpage_type is None: 481 logWarn('missing one or more required open block attributes:' 482 'refpage, desc, or type') 483 # Leave pi is None so open block delimiters are ignored 484 else: 485 pi = lookupPage(pageMap, name) 486 pi.desc = desc 487 # Must match later type definitions in interface/validity includes 488 pi.type = refpage_type 489 pi.spec = spec_type 490 pi.anchor = anchor 491 if xrefs: 492 pi.refs = xrefs 493 logDiag('open block for', name, 'added DESC =', desc, 494 'TYPE =', refpage_type, 'XREFS =', xrefs, 495 'SPEC =', spec_type, 'ANCHOR =', anchor) 496 497 line = line + 1 498 continue 499 500 # '--' starting or ending and open block 501 if file[line].rstrip() == '--': 502 if openBlockState == 'outside': 503 # Only refpage open blocks should use -- delimiters 504 logWarn('Unexpected double-dash block delimiters') 505 elif openBlockState == 'start': 506 # -- delimiter following [open,refpage=...] 507 openBlockState = 'inside' 508 509 if pi is None: 510 logWarn('no pageInfo available for opening -- delimiter') 511 else: 512 pi.begin = line + 1 513 logDiag('opening -- delimiter: added BEGIN =', pi.begin) 514 elif openBlockState == 'inside': 515 # -- delimiter ending an open block 516 if pi is None: 517 logWarn('no pageInfo available for closing -- delimiter') 518 else: 519 pi.end = line - 1 520 logDiag('closing -- delimiter: added END =', pi.end) 521 522 openBlockState = 'outside' 523 pi = None 524 else: 525 logWarn('unknown openBlockState:', openBlockState) 526 527 line = line + 1 528 continue 529 530 matches = INCLUDE.search(file[line]) 531 if matches is not None: 532 # Something got included, not sure what yet. 533 gen_type = matches.group('generated_type') 534 refpage_type = matches.group('category') 535 name = matches.group('entity_name') 536 537 # This will never match in OpenCL 538 if gen_type == 'validity': 539 logDiag('Matched validity pattern') 540 if pi is not None: 541 if pi.type and refpage_type != pi.type: 542 logWarn('ERROR: pageMap[' + name + '] type:', 543 pi.type, 'does not match type:', refpage_type) 544 pi.type = refpage_type 545 pi.validity = line 546 logDiag('added TYPE =', pi.type, 'VALIDITY =', pi.validity) 547 else: 548 logWarn('validity include:: line NOT inside block') 549 550 line = line + 1 551 continue 552 553 if gen_type == 'api': 554 logDiag('Matched include pattern') 555 if pi is not None: 556 if pi.include is not None: 557 logDiag('found multiple includes for this block') 558 if pi.type and refpage_type != pi.type: 559 logWarn('ERROR: pageMap[' + name + '] type:', 560 pi.type, 'does not match type:', refpage_type) 561 pi.type = refpage_type 562 pi.include = line 563 logDiag('added TYPE =', pi.type, 'INCLUDE =', pi.include) 564 else: 565 logWarn('interface include:: line NOT inside block') 566 567 line = line + 1 568 continue 569 570 logDiag('ignoring unrecognized include line ', matches.group()) 571 572 # Vulkan 1.1 markup allows the last API include construct to be 573 # followed by an asciidoctor endif:: construct (and also preceded, 574 # at some distance). 575 # This looks for endif:: immediately following an include:: line 576 # and, if found, moves the include boundary to this line. 577 matches = endifPat.search(file[line]) 578 if matches is not None and pi is not None: 579 if pi.include == line - 1: 580 logDiag('Matched endif pattern following include; moving include') 581 pi.include = line 582 else: 583 logDiag('Matched endif pattern (not following include)') 584 585 line = line + 1 586 continue 587 588 matches = bodyPat.search(file[line]) 589 if matches is not None: 590 logDiag('Matched // refBody pattern') 591 if pi is not None: 592 pi.body = line 593 logDiag('added BODY =', pi.body) 594 else: 595 logWarn('// refBody line NOT inside block') 596 597 line = line + 1 598 continue 599 600 # OpenCL spec uses // refError to tag "validity" (Errors) language, 601 # instead of /validity/ includes. 602 matches = errorPat.search(file[line]) 603 if matches is not None: 604 logDiag('Matched // refError pattern') 605 if pi is not None: 606 pi.validity = line 607 logDiag('added VALIDITY (refError) =', pi.validity) 608 else: 609 logWarn('// refError line NOT inside block') 610 611 line = line + 1 612 continue 613 614 line = line + 1 615 continue 616 617 if pi is not None: 618 logErr('Unclosed open block at EOF!') 619 620 setLogSourcefile(None) 621 setLogProcname(None) 622 setLogLine(None) 623 624 return pageMap