check_callgraph.py
1 #!/usr/bin/env python 2 # 3 # Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron 4 # Copyright 2020 Espressif Systems (Shanghai) PTE LTD 5 # 6 # Licensed under the Apache License, Version 2.0 (the "License"); 7 # you may not use this file except in compliance with the License. 8 # You may obtain a copy of the License at 9 # 10 # http://www.apache.org/licenses/LICENSE-2.0 11 # 12 # Unless required by applicable law or agreed to in writing, software 13 # distributed under the License is distributed on an "AS IS" BASIS, 14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 # See the License for the specific language governing permissions and 16 # limitations under the License. 17 18 import argparse 19 from functools import partial 20 import os 21 import re 22 23 import elftools 24 from elftools.elf import elffile 25 26 try: 27 from typing import List, Optional, BinaryIO, Tuple, Generator, Dict, Callable 28 except ImportError: 29 pass 30 31 FUNCTION_REGEX = re.compile( 32 r"^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$" 33 ) 34 CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$') 35 SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$') 36 37 38 class RtlFunction(object): 39 def __init__(self, name, rtl_filename, tu_filename): 40 self.name = name 41 self.rtl_filename = rtl_filename 42 self.tu_filename = tu_filename 43 self.calls = list() # type: List[str] 44 self.refs = list() # type: List[str] 45 self.sym = None 46 47 48 class SectionAddressRange(object): 49 def __init__(self, name, addr, size): # type: (str, int, int) -> None 50 self.name = name 51 self.low = addr 52 self.high = addr + size 53 54 def __str__(self): 55 return "{}: 0x{:08x} - 0x{:08x}".format(self.name, self.low, self.high) 56 57 def contains_address(self, addr): 58 return self.low <= addr < self.high 59 60 61 TARGET_SECTIONS = { 62 "esp32": [ 63 SectionAddressRange(".rom.text", 0x40000000, 0x70000), 64 SectionAddressRange(".rom.rodata", 0x3ff96000, 0x9018) 65 ], 66 "esp32s2": [ 67 SectionAddressRange(".rom.text", 0x40000000, 0x1bed0), 68 SectionAddressRange(".rom.rodata", 0x3ffac600, 0x392c) 69 ], 70 "esp32s3": [ 71 SectionAddressRange(".rom.text", 0x40000000, 0x568d0), 72 SectionAddressRange(".rom.rodata", 0x3ff071c0, 0x8e30) 73 ] 74 } # type: Dict[str, List[SectionAddressRange]] 75 76 77 class Symbol(object): 78 def __init__(self, name, addr, local, filename, section): # type: (str, int, bool, Optional[str], Optional[str]) -> None 79 self.name = name 80 self.addr = addr 81 self.local = local 82 self.filename = filename 83 self.section = section 84 self.refers_to = list() # type: List[Symbol] 85 self.referred_from = list() # type: List[Symbol] 86 87 def __str__(self): 88 return "{} @0x{:08x} [{}]{} {}".format( 89 self.name, 90 self.addr, 91 self.section or "unknown", 92 " (local)" if self.local else "", 93 self.filename 94 ) 95 96 97 class Reference(object): 98 def __init__(self, from_sym, to_sym): # type: (Symbol, Symbol) -> None 99 self.from_sym = from_sym 100 self.to_sym = to_sym 101 102 def __str__(self): 103 return "{} @0x{:08x} ({}) -> {} @0x{:08x} ({})".format( 104 self.from_sym.name, 105 self.from_sym.addr, 106 self.from_sym.section, 107 self.to_sym.name, 108 self.to_sym.addr, 109 self.to_sym.section 110 ) 111 112 113 class ElfInfo(object): 114 def __init__(self, elf_file): # type: (BinaryIO) -> None 115 self.elf_file = elf_file 116 self.elf_obj = elffile.ELFFile(self.elf_file) 117 self.section_ranges = self._load_sections() 118 self.symbols = self._load_symbols() 119 120 def _load_symbols(self): # type: () -> List[Symbol] 121 symbols = [] 122 for s in self.elf_obj.iter_sections(): 123 if not isinstance(s, elftools.elf.sections.SymbolTableSection): 124 continue 125 filename = None 126 for sym in s.iter_symbols(): 127 sym_type = sym.entry["st_info"]["type"] 128 if sym_type == "STT_FILE": 129 filename = sym.name 130 if sym_type in ["STT_NOTYPE", "STT_FUNC", "STT_OBJECT"]: 131 local = sym.entry["st_info"]["bind"] == "STB_LOCAL" 132 addr = sym.entry["st_value"] 133 symbols.append( 134 Symbol( 135 sym.name, 136 addr, 137 local, 138 filename if local else None, 139 self.section_for_addr(addr), 140 ) 141 ) 142 return symbols 143 144 def _load_sections(self): # type: () -> List[SectionAddressRange] 145 result = [] 146 for segment in self.elf_obj.iter_segments(): 147 if segment["p_type"] == "PT_LOAD": 148 for section in self.elf_obj.iter_sections(): 149 if not segment.section_in_segment(section): 150 continue 151 result.append( 152 SectionAddressRange( 153 section.name, section["sh_addr"], section["sh_size"] 154 ) 155 ) 156 157 target = os.environ.get("IDF_TARGET") 158 if target in TARGET_SECTIONS: 159 result += TARGET_SECTIONS[target] 160 161 return result 162 163 def symbols_by_name(self, name): # type: (str) -> List[Symbol] 164 res = [] 165 for sym in self.symbols: 166 if sym.name == name: 167 res.append(sym) 168 return res 169 170 def section_for_addr(self, sym_addr): # type: (int) -> Optional[str] 171 for sar in self.section_ranges: 172 if sar.contains_address(sym_addr): 173 return sar.name 174 return None 175 176 177 def load_rtl_file(rtl_filename, tu_filename, functions): # type: (str, str, List[RtlFunction]) -> None 178 last_function = None # type: Optional[RtlFunction] 179 for line in open(rtl_filename): 180 # Find function definition 181 match = re.match(FUNCTION_REGEX, line) 182 if match: 183 function_name = match.group("function") 184 last_function = RtlFunction(function_name, rtl_filename, tu_filename) 185 functions.append(last_function) 186 continue 187 188 if last_function: 189 # Find direct function calls 190 match = re.match(CALL_REGEX, line) 191 if match: 192 target = match.group("target") 193 if target not in last_function.calls: 194 last_function.calls.append(target) 195 continue 196 197 # Find symbol references 198 match = re.match(SYMBOL_REF_REGEX, line) 199 if match: 200 target = match.group("target") 201 if target not in last_function.refs: 202 last_function.refs.append(target) 203 continue 204 205 206 def rtl_filename_matches_sym_filename(rtl_filename, symbol_filename): # type: (str, str) -> bool 207 # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c". 208 # RTL file names are paths relative to the build directory, e.g.: 209 # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand" 210 # 211 # The check below may give a false positive if there are two files with the same name in 212 # different directories. This doesn't seem to happen in IDF now, but if it does happen, 213 # an assert in find_symbol_by_rtl_func should catch this. 214 # 215 # If this becomes and issue, consider also loading the .map file and using it to figure out 216 # which object file was used as the source of each symbol. Names of the object files and RTL files 217 # should be much easier to match. 218 return os.path.basename(rtl_filename).startswith(symbol_filename) 219 220 221 class SymbolNotFound(RuntimeError): 222 pass 223 224 225 def find_symbol_by_name(name, elfinfo, local_func_matcher): # type: (str, ElfInfo, Callable[[Symbol], bool]) -> Optional[Symbol] 226 """ 227 Find an ELF symbol for the given name. 228 local_func_matcher is a callback function which checks is the candidate local symbol is suitable. 229 """ 230 syms = elfinfo.symbols_by_name(name) 231 if not syms: 232 return None 233 if len(syms) == 1: 234 return syms[0] 235 else: 236 # There are multiple symbols with a given name. Find the best fit. 237 local_candidate = None 238 global_candidate = None 239 for sym in syms: 240 if not sym.local: 241 assert not global_candidate # can't have two global symbols with the same name 242 global_candidate = sym 243 elif local_func_matcher(sym): 244 assert not local_candidate # can't have two symbols with the same name in a single file 245 local_candidate = sym 246 247 # If two symbols with the same name are defined, a global and a local one, 248 # prefer the local symbol as the reference target. 249 return local_candidate or global_candidate 250 251 252 def match_local_source_func(rtl_filename, sym): # type: (str, Symbol) -> bool 253 """ 254 Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the 255 reference source (caller), based on the RTL file name. 256 """ 257 assert sym.filename # should be set for local functions 258 return rtl_filename_matches_sym_filename(rtl_filename, sym.filename) 259 260 261 def match_local_target_func(rtl_filename, sym_from, sym): # type: (str, Symbol, Symbol) -> bool 262 """ 263 Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the 264 reference target (callee or referenced data), based on RTL filename of the source symbol 265 and the source symbol itself. 266 """ 267 assert sym.filename # should be set for local functions 268 if sym_from.local: 269 # local symbol referencing another local symbol 270 return sym_from.filename == sym.filename 271 else: 272 # global symbol referencing a local symbol; 273 # source filename is not known, use RTL filename as a hint 274 return rtl_filename_matches_sym_filename(rtl_filename, sym.filename) 275 276 277 def match_rtl_funcs_to_symbols(rtl_functions, elfinfo): # type: (List[RtlFunction], ElfInfo) -> Tuple[List[Symbol], List[Reference]] 278 symbols = [] # type: List[Symbol] 279 refs = [] # type: List[Reference] 280 281 # General idea: 282 # - iterate over RTL functions. 283 # - for each RTL function, find the corresponding symbol 284 # - iterate over the functions and variables referenced from this RTL function 285 # - find symbols corresponding to the references 286 # - record every pair (sym_from, sym_to) as a Reference object 287 288 for source_rtl_func in rtl_functions: 289 maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename)) 290 if maybe_sym_from is None: 291 # RTL references a symbol, but the symbol is not defined in the generated object file. 292 # This means that the symbol was likely removed (or not included) at link time. 293 # There is nothing we can do to check section placement in this case. 294 continue 295 sym_from = maybe_sym_from 296 297 if sym_from not in symbols: 298 symbols.append(sym_from) 299 300 for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs: 301 if "*.LC" in target_rtl_func_name: # skip local labels 302 continue 303 304 maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from)) 305 if not maybe_sym_to: 306 # This may happen for a extern reference in the RTL file, if the reference was later removed 307 # by one of the optimization passes, and the external definition got garbage-collected. 308 # TODO: consider adding some sanity check that we are here not because of some bug in 309 # find_symbol_by_name?.. 310 continue 311 sym_to = maybe_sym_to 312 313 sym_from.refers_to.append(sym_to) 314 sym_to.referred_from.append(sym_from) 315 refs.append(Reference(sym_from, sym_to)) 316 if sym_to not in symbols: 317 symbols.append(sym_to) 318 319 return symbols, refs 320 321 322 def get_symbols_and_refs(rtl_list, elf_file): # type: (List[str], BinaryIO) -> Tuple[List[Symbol], List[Reference]] 323 elfinfo = ElfInfo(elf_file) 324 325 rtl_functions = [] # type: List[RtlFunction] 326 for file_name in rtl_list: 327 load_rtl_file(file_name, file_name, rtl_functions) 328 329 return match_rtl_funcs_to_symbols(rtl_functions, elfinfo) 330 331 332 def list_refs_from_to_sections(refs, from_sections, to_sections): # type: (List[Reference], List[str], List[str]) -> int 333 found = 0 334 for ref in refs: 335 if (not from_sections or ref.from_sym.section in from_sections) and \ 336 (not to_sections or ref.to_sym.section in to_sections): 337 print(str(ref)) 338 found += 1 339 return found 340 341 342 def find_files_recursive(root_path, ext): # type: (str, str) -> Generator[str, None, None] 343 for root, _, files in os.walk(root_path): 344 for basename in files: 345 if basename.endswith(ext): 346 filename = os.path.join(root, basename) 347 yield filename 348 349 350 def main(): 351 parser = argparse.ArgumentParser() 352 353 parser.add_argument( 354 "--rtl-list", 355 help="File with the list of RTL files", 356 type=argparse.FileType("r"), 357 ) 358 parser.add_argument( 359 "--rtl-dir", help="Directory where to look for RTL files, recursively" 360 ) 361 parser.add_argument( 362 "--elf-file", 363 required=True, 364 help="Program ELF file", 365 type=argparse.FileType("rb"), 366 ) 367 action_sub = parser.add_subparsers(dest="action") 368 find_refs_parser = action_sub.add_parser( 369 "find-refs", 370 help="List the references coming from a given list of source sections" 371 "to a given list of target sections.", 372 ) 373 find_refs_parser.add_argument( 374 "--from-sections", help="comma-separated list of source sections" 375 ) 376 find_refs_parser.add_argument( 377 "--to-sections", help="comma-separated list of target sections" 378 ) 379 find_refs_parser.add_argument( 380 "--exit-code", 381 action="store_true", 382 help="If set, exits with non-zero code when any references found", 383 ) 384 action_sub.add_parser( 385 "all-refs", 386 help="Print the list of all references", 387 ) 388 389 parser.parse_args() 390 args = parser.parse_args() 391 if args.rtl_list: 392 with open(args.rtl_list, "r") as rtl_list_file: 393 rtl_list = [line.strip for line in rtl_list_file] 394 else: 395 if not args.rtl_dir: 396 raise RuntimeError("Either --rtl-list or --rtl-dir must be specified") 397 rtl_list = list(find_files_recursive(args.rtl_dir, ".expand")) 398 399 if not rtl_list: 400 raise RuntimeError("No RTL files specified") 401 402 _, refs = get_symbols_and_refs(rtl_list, args.elf_file) 403 404 if args.action == "find-refs": 405 from_sections = args.from_sections.split(",") if args.from_sections else [] 406 to_sections = args.to_sections.split(",") if args.to_sections else [] 407 found = list_refs_from_to_sections( 408 refs, from_sections, to_sections 409 ) 410 if args.exit_code and found: 411 raise SystemExit(1) 412 elif args.action == "all-refs": 413 for r in refs: 414 print(str(r)) 415 416 417 if __name__ == "__main__": 418 main()