macro_checker.py
1 """Provides the MacroChecker class.""" 2 3 # Copyright (c) 2018-2019 Collabora, Ltd. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 # Author(s): Ryan Pavlik <ryan.pavlik@collabora.com> 18 19 from io import StringIO 20 import re 21 22 23 class MacroChecker(object): 24 """Perform and track checking of one or more files in an API spec. 25 26 This does not necessarily need to be subclassed per-API: it is sufficiently 27 parameterized in the constructor for expected usage. 28 """ 29 30 def __init__(self, enabled_messages, entity_db, 31 macro_checker_file_type, root_path): 32 """Construct an object that tracks checking one or more files in an API spec. 33 34 enabled_messages -- a set of MessageId that should be enabled. 35 entity_db -- an object of a EntityDatabase subclass for this API. 36 macro_checker_file_type -- Type to instantiate to create the right 37 MacroCheckerFile subclass for this API. 38 root_path -- A Path object for the root of this repository. 39 """ 40 self.enabled_messages = enabled_messages 41 self.entity_db = entity_db 42 self.macro_checker_file_type = macro_checker_file_type 43 self.root_path = root_path 44 45 self.files = [] 46 47 self.refpages = set() 48 49 # keys: entity names. values: MessageContext 50 self.links = {} 51 self.apiIncludes = {} 52 self.validityIncludes = {} 53 self.headings = {} 54 55 # Regexes that are members because they depend on the name prefix. 56 57 # apiPrefix, followed by some word characters or * as many times as desired, 58 # NOT followed by >> and NOT preceded by one of the characters in that first character class. 59 # (which distinguish "names being used somewhere other than prose"). 60 self.suspected_missing_macro_re = re.compile( 61 r'\b(?<![-=:/[\.`+,])(?P<entity_name>{}[\w*]+)\b(?!>>)'.format( 62 self.entity_db.case_insensitive_name_prefix_pattern) 63 ) 64 self.heading_command_re = re.compile( 65 r'=+ (?P<command>{}[\w]+)'.format(self.entity_db.name_prefix) 66 ) 67 68 macros_pattern = '|'.join((re.escape(macro) 69 for macro in self.entity_db.macros)) 70 # the "formatting" group is to strip matching */**/_/__ 71 # surrounding an entire macro. 72 self.macro_re = re.compile( 73 r'(?P<formatting>\**|_*)(?P<macro>{}):(?P<entity_name>[\w*]+((?P<subscript>[\[][^\]]*[\]]))?)(?P=formatting)'.format(macros_pattern)) 74 75 def haveLinkTarget(self, entity): 76 """Report if we have parsed an API include (or heading) for an entity. 77 78 None if there is no entity with that name. 79 """ 80 if not self.findEntity(entity): 81 return None 82 if entity in self.apiIncludes: 83 return True 84 return entity in self.headings 85 86 def hasFixes(self): 87 """Report if any files have auto-fixes.""" 88 for f in self.files: 89 if f.hasFixes(): 90 return True 91 return False 92 93 def addLinkToEntity(self, entity, context): 94 """Record seeing a link to an entity's docs from a context.""" 95 if entity not in self.links: 96 self.links[entity] = [] 97 self.links[entity].append(context) 98 99 def seenRefPage(self, entity): 100 """Check if a ref-page markup block has been seen for an entity.""" 101 return entity in self.refpages 102 103 def addRefPage(self, entity): 104 """Record seeing a ref-page markup block for an entity.""" 105 self.refpages.add(entity) 106 107 def findMacroAndEntity(self, macro, entity): 108 """Look up EntityData by macro and entity pair. 109 110 Forwards to the EntityDatabase. 111 """ 112 return self.entity_db.findMacroAndEntity(macro, entity) 113 114 def findEntity(self, entity): 115 """Look up EntityData by entity name (case-sensitive). 116 117 Forwards to the EntityDatabase. 118 """ 119 return self.entity_db.findEntity(entity) 120 121 def findEntityCaseInsensitive(self, entity): 122 """Look up EntityData by entity name (case-insensitive). 123 124 Forwards to the EntityDatabase. 125 """ 126 return self.entity_db.findEntityCaseInsensitive(entity) 127 128 def getMemberNames(self, commandOrStruct): 129 """Given a command or struct name, retrieve the names of each member/param. 130 131 Returns an empty list if the entity is not found or doesn't have members/params. 132 133 Forwards to the EntityDatabase. 134 """ 135 return self.entity_db.getMemberNames(commandOrStruct) 136 137 def likelyRecognizedEntity(self, entity_name): 138 """Guess (based on name prefix alone) if an entity is likely to be recognized. 139 140 Forwards to the EntityDatabase. 141 """ 142 return self.entity_db.likelyRecognizedEntity(entity_name) 143 144 def isLinkedMacro(self, macro): 145 """Identify if a macro is considered a "linked" macro. 146 147 Forwards to the EntityDatabase. 148 """ 149 return self.entity_db.isLinkedMacro(macro) 150 151 def processFile(self, filename): 152 """Parse an .adoc file belonging to the spec and check it for errors.""" 153 class FileStreamMaker(object): 154 def __init__(self, filename): 155 self.filename = filename 156 157 def make_stream(self): 158 return open(self.filename, 'r', encoding='utf-8') 159 160 f = self.macro_checker_file_type(self, filename, self.enabled_messages, 161 FileStreamMaker(filename)) 162 f.process() 163 self.files.append(f) 164 165 def processString(self, s): 166 """Process a string as if it were a spec file. 167 168 Used for testing purposes. 169 """ 170 if "\n" in s.rstrip(): 171 # remove leading spaces from each line to allow easier 172 # block-quoting in tests 173 s = "\n".join((line.lstrip() for line in s.split("\n"))) 174 # fabricate a "filename" that will display better. 175 filename = "string{}\n****START OF STRING****\n{}\n****END OF STRING****\n".format( 176 len(self.files), s.rstrip()) 177 178 else: 179 filename = "string{}: {}".format( 180 len(self.files), s.rstrip()) 181 182 class StringStreamMaker(object): 183 def __init__(self, string): 184 self.string = string 185 186 def make_stream(self): 187 return StringIO(self.string) 188 189 f = self.macro_checker_file_type(self, filename, self.enabled_messages, 190 StringStreamMaker(s)) 191 f.process() 192 self.files.append(f) 193 return f 194 195 def numDiagnostics(self): 196 """Return the total number of diagnostics (warnings and errors) over all the files processed.""" 197 return sum((f.numDiagnostics() for f in self.files)) 198 199 def numErrors(self): 200 """Return the total number of errors over all the files processed.""" 201 return sum((f.numErrors() for f in self.files)) 202 203 def getMissingUnreferencedApiIncludes(self): 204 """Return the unreferenced entity names that we expected to see an API include or link target for, but did not. 205 206 Counterpart to getBrokenLinks(): This method returns the entity names 207 that were not used in a linking macro (and thus wouldn't create a broken link), 208 but were nevertheless expected and not seen. 209 """ 210 return (entity for entity in self.entity_db.generating_entities 211 if (not self.haveLinkTarget(entity)) and entity not in self.links) 212 213 def getBrokenLinks(self): 214 """Return the entity names and usage contexts that we expected to see an API include or link target for, but did not. 215 216 Counterpart to getMissingUnreferencedApiIncludes(): This method returns only the 217 entity names that were used in a linking macro (and thus create a broken link), 218 but were not seen. The values of the dictionary are a list of MessageContext objects 219 for each linking macro usage for this entity name. 220 """ 221 return {entity: contexts for entity, contexts in self.links.items() 222 if self.entity_db.entityGenerates(entity) and not self.haveLinkTarget(entity)} 223 224 def getMissingRefPages(self): 225 """Return a list of entities that we expected, but did not see, a ref page block for. 226 227 The heuristics here are rather crude: we expect a ref page for every generating entry. 228 """ 229 return (entity for entity in sorted(self.entity_db.generating_entities) 230 if entity not in self.refpages)