/ tools / ci / check_callgraph.py
check_callgraph.py
  1  #!/usr/bin/env python
  2  #
  3  # Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron
  4  # Copyright 2020 Espressif Systems (Shanghai) PTE LTD
  5  #
  6  # Licensed under the Apache License, Version 2.0 (the "License");
  7  # you may not use this file except in compliance with the License.
  8  # You may obtain a copy of the License at
  9  #
 10  #     http://www.apache.org/licenses/LICENSE-2.0
 11  #
 12  # Unless required by applicable law or agreed to in writing, software
 13  # distributed under the License is distributed on an "AS IS" BASIS,
 14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  # See the License for the specific language governing permissions and
 16  # limitations under the License.
 17  
 18  import argparse
 19  from functools import partial
 20  import os
 21  import re
 22  
 23  import elftools
 24  from elftools.elf import elffile
 25  
 26  try:
 27      from typing import List, Optional, BinaryIO, Tuple, Generator, Dict, Callable
 28  except ImportError:
 29      pass
 30  
 31  FUNCTION_REGEX = re.compile(
 32      r"^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$"
 33  )
 34  CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$')
 35  SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$')
 36  
 37  
 38  class RtlFunction(object):
 39      def __init__(self, name, rtl_filename, tu_filename):
 40          self.name = name
 41          self.rtl_filename = rtl_filename
 42          self.tu_filename = tu_filename
 43          self.calls = list()  # type: List[str]
 44          self.refs = list()  # type: List[str]
 45          self.sym = None
 46  
 47  
 48  class SectionAddressRange(object):
 49      def __init__(self, name, addr, size):  # type: (str, int, int) -> None
 50          self.name = name
 51          self.low = addr
 52          self.high = addr + size
 53  
 54      def __str__(self):
 55          return "{}: 0x{:08x} - 0x{:08x}".format(self.name, self.low, self.high)
 56  
 57      def contains_address(self, addr):
 58          return self.low <= addr < self.high
 59  
 60  
 61  TARGET_SECTIONS = {
 62      "esp32": [
 63          SectionAddressRange(".rom.text", 0x40000000, 0x70000),
 64          SectionAddressRange(".rom.rodata", 0x3ff96000, 0x9018)
 65      ],
 66      "esp32s2": [
 67          SectionAddressRange(".rom.text", 0x40000000, 0x1bed0),
 68          SectionAddressRange(".rom.rodata", 0x3ffac600, 0x392c)
 69      ],
 70      "esp32s3": [
 71          SectionAddressRange(".rom.text", 0x40000000, 0x568d0),
 72          SectionAddressRange(".rom.rodata", 0x3ff071c0, 0x8e30)
 73      ]
 74  }  # type: Dict[str, List[SectionAddressRange]]
 75  
 76  
 77  class Symbol(object):
 78      def __init__(self, name, addr, local, filename, section):  # type: (str, int, bool, Optional[str], Optional[str]) -> None
 79          self.name = name
 80          self.addr = addr
 81          self.local = local
 82          self.filename = filename
 83          self.section = section
 84          self.refers_to = list()  # type: List[Symbol]
 85          self.referred_from = list()  # type: List[Symbol]
 86  
 87      def __str__(self):
 88          return "{} @0x{:08x} [{}]{} {}".format(
 89              self.name,
 90              self.addr,
 91              self.section or "unknown",
 92              " (local)" if self.local else "",
 93              self.filename
 94          )
 95  
 96  
 97  class Reference(object):
 98      def __init__(self, from_sym, to_sym):  # type: (Symbol, Symbol) -> None
 99          self.from_sym = from_sym
100          self.to_sym = to_sym
101  
102      def __str__(self):
103          return "{} @0x{:08x} ({}) -> {} @0x{:08x} ({})".format(
104              self.from_sym.name,
105              self.from_sym.addr,
106              self.from_sym.section,
107              self.to_sym.name,
108              self.to_sym.addr,
109              self.to_sym.section
110          )
111  
112  
113  class ElfInfo(object):
114      def __init__(self, elf_file):  # type: (BinaryIO) -> None
115          self.elf_file = elf_file
116          self.elf_obj = elffile.ELFFile(self.elf_file)
117          self.section_ranges = self._load_sections()
118          self.symbols = self._load_symbols()
119  
120      def _load_symbols(self):  # type: () -> List[Symbol]
121          symbols = []
122          for s in self.elf_obj.iter_sections():
123              if not isinstance(s, elftools.elf.sections.SymbolTableSection):
124                  continue
125              filename = None
126              for sym in s.iter_symbols():
127                  sym_type = sym.entry["st_info"]["type"]
128                  if sym_type == "STT_FILE":
129                      filename = sym.name
130                  if sym_type in ["STT_NOTYPE", "STT_FUNC", "STT_OBJECT"]:
131                      local = sym.entry["st_info"]["bind"] == "STB_LOCAL"
132                      addr = sym.entry["st_value"]
133                      symbols.append(
134                          Symbol(
135                              sym.name,
136                              addr,
137                              local,
138                              filename if local else None,
139                              self.section_for_addr(addr),
140                          )
141                      )
142          return symbols
143  
144      def _load_sections(self):  # type: () -> List[SectionAddressRange]
145          result = []
146          for segment in self.elf_obj.iter_segments():
147              if segment["p_type"] == "PT_LOAD":
148                  for section in self.elf_obj.iter_sections():
149                      if not segment.section_in_segment(section):
150                          continue
151                      result.append(
152                          SectionAddressRange(
153                              section.name, section["sh_addr"], section["sh_size"]
154                          )
155                      )
156  
157          target = os.environ.get("IDF_TARGET")
158          if target in TARGET_SECTIONS:
159              result += TARGET_SECTIONS[target]
160  
161          return result
162  
163      def symbols_by_name(self, name):  # type: (str) -> List[Symbol]
164          res = []
165          for sym in self.symbols:
166              if sym.name == name:
167                  res.append(sym)
168          return res
169  
170      def section_for_addr(self, sym_addr):  # type: (int) -> Optional[str]
171          for sar in self.section_ranges:
172              if sar.contains_address(sym_addr):
173                  return sar.name
174          return None
175  
176  
177  def load_rtl_file(rtl_filename, tu_filename, functions):  # type: (str, str, List[RtlFunction]) -> None
178      last_function = None  # type: Optional[RtlFunction]
179      for line in open(rtl_filename):
180          # Find function definition
181          match = re.match(FUNCTION_REGEX, line)
182          if match:
183              function_name = match.group("function")
184              last_function = RtlFunction(function_name, rtl_filename, tu_filename)
185              functions.append(last_function)
186              continue
187  
188          if last_function:
189              # Find direct function calls
190              match = re.match(CALL_REGEX, line)
191              if match:
192                  target = match.group("target")
193                  if target not in last_function.calls:
194                      last_function.calls.append(target)
195                  continue
196  
197              # Find symbol references
198              match = re.match(SYMBOL_REF_REGEX, line)
199              if match:
200                  target = match.group("target")
201                  if target not in last_function.refs:
202                      last_function.refs.append(target)
203                  continue
204  
205  
206  def rtl_filename_matches_sym_filename(rtl_filename, symbol_filename):  # type: (str, str) -> bool
207      # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c".
208      # RTL file names are paths relative to the build directory, e.g.:
209      # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand"
210      #
211      # The check below may give a false positive if there are two files with the same name in
212      # different directories. This doesn't seem to happen in IDF now, but if it does happen,
213      # an assert in find_symbol_by_rtl_func should catch this.
214      #
215      # If this becomes and issue, consider also loading the .map file and using it to figure out
216      # which object file was used as the source of each symbol. Names of the object files and RTL files
217      # should be much easier to match.
218      return os.path.basename(rtl_filename).startswith(symbol_filename)
219  
220  
221  class SymbolNotFound(RuntimeError):
222      pass
223  
224  
225  def find_symbol_by_name(name, elfinfo, local_func_matcher):  # type: (str, ElfInfo, Callable[[Symbol], bool]) -> Optional[Symbol]
226      """
227      Find an ELF symbol for the given name.
228      local_func_matcher is a callback function which checks is the candidate local symbol is suitable.
229      """
230      syms = elfinfo.symbols_by_name(name)
231      if not syms:
232          return None
233      if len(syms) == 1:
234          return syms[0]
235      else:
236          # There are multiple symbols with a given name. Find the best fit.
237          local_candidate = None
238          global_candidate = None
239          for sym in syms:
240              if not sym.local:
241                  assert not global_candidate  # can't have two global symbols with the same name
242                  global_candidate = sym
243              elif local_func_matcher(sym):
244                  assert not local_candidate  # can't have two symbols with the same name in a single file
245                  local_candidate = sym
246  
247          # If two symbols with the same name are defined, a global and a local one,
248          # prefer the local symbol as the reference target.
249          return local_candidate or global_candidate
250  
251  
252  def match_local_source_func(rtl_filename, sym):  # type: (str, Symbol) -> bool
253      """
254      Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
255      reference source (caller), based on the RTL file name.
256      """
257      assert sym.filename  # should be set for local functions
258      return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
259  
260  
261  def match_local_target_func(rtl_filename, sym_from, sym):  # type: (str, Symbol, Symbol) -> bool
262      """
263      Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
264      reference target (callee or referenced data), based on RTL filename of the source symbol
265      and the source symbol itself.
266      """
267      assert sym.filename  # should be set for local functions
268      if sym_from.local:
269          # local symbol referencing another local symbol
270          return sym_from.filename == sym.filename
271      else:
272          # global symbol referencing a local symbol;
273          # source filename is not known, use RTL filename as a hint
274          return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
275  
276  
277  def match_rtl_funcs_to_symbols(rtl_functions, elfinfo):  # type: (List[RtlFunction], ElfInfo) -> Tuple[List[Symbol], List[Reference]]
278      symbols = []  # type: List[Symbol]
279      refs = []  # type: List[Reference]
280  
281      # General idea:
282      # - iterate over RTL functions.
283      #   - for each RTL function, find the corresponding symbol
284      #   - iterate over the functions and variables referenced from this RTL function
285      #     - find symbols corresponding to the references
286      #     - record every pair (sym_from, sym_to) as a Reference object
287  
288      for source_rtl_func in rtl_functions:
289          maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename))
290          if maybe_sym_from is None:
291              # RTL references a symbol, but the symbol is not defined in the generated object file.
292              # This means that the symbol was likely removed (or not included) at link time.
293              # There is nothing we can do to check section placement in this case.
294              continue
295          sym_from = maybe_sym_from
296  
297          if sym_from not in symbols:
298              symbols.append(sym_from)
299  
300          for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs:
301              if "*.LC" in target_rtl_func_name:  # skip local labels
302                  continue
303  
304              maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from))
305              if not maybe_sym_to:
306                  # This may happen for a extern reference in the RTL file, if the reference was later removed
307                  # by one of the optimization passes, and the external definition got garbage-collected.
308                  # TODO: consider adding some sanity check that we are here not because of some bug in
309                  # find_symbol_by_name?..
310                  continue
311              sym_to = maybe_sym_to
312  
313              sym_from.refers_to.append(sym_to)
314              sym_to.referred_from.append(sym_from)
315              refs.append(Reference(sym_from, sym_to))
316              if sym_to not in symbols:
317                  symbols.append(sym_to)
318  
319      return symbols, refs
320  
321  
322  def get_symbols_and_refs(rtl_list, elf_file):  # type: (List[str], BinaryIO) -> Tuple[List[Symbol], List[Reference]]
323      elfinfo = ElfInfo(elf_file)
324  
325      rtl_functions = []  # type: List[RtlFunction]
326      for file_name in rtl_list:
327          load_rtl_file(file_name, file_name, rtl_functions)
328  
329      return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)
330  
331  
332  def list_refs_from_to_sections(refs, from_sections, to_sections):  # type: (List[Reference], List[str], List[str]) -> int
333      found = 0
334      for ref in refs:
335          if (not from_sections or ref.from_sym.section in from_sections) and \
336             (not to_sections or ref.to_sym.section in to_sections):
337              print(str(ref))
338              found += 1
339      return found
340  
341  
342  def find_files_recursive(root_path, ext):  # type: (str, str) -> Generator[str, None, None]
343      for root, _, files in os.walk(root_path):
344          for basename in files:
345              if basename.endswith(ext):
346                  filename = os.path.join(root, basename)
347                  yield filename
348  
349  
350  def main():
351      parser = argparse.ArgumentParser()
352  
353      parser.add_argument(
354          "--rtl-list",
355          help="File with the list of RTL files",
356          type=argparse.FileType("r"),
357      )
358      parser.add_argument(
359          "--rtl-dir", help="Directory where to look for RTL files, recursively"
360      )
361      parser.add_argument(
362          "--elf-file",
363          required=True,
364          help="Program ELF file",
365          type=argparse.FileType("rb"),
366      )
367      action_sub = parser.add_subparsers(dest="action")
368      find_refs_parser = action_sub.add_parser(
369          "find-refs",
370          help="List the references coming from a given list of source sections"
371               "to a given list of target sections.",
372      )
373      find_refs_parser.add_argument(
374          "--from-sections", help="comma-separated list of source sections"
375      )
376      find_refs_parser.add_argument(
377          "--to-sections", help="comma-separated list of target sections"
378      )
379      find_refs_parser.add_argument(
380          "--exit-code",
381          action="store_true",
382          help="If set, exits with non-zero code when any references found",
383      )
384      action_sub.add_parser(
385          "all-refs",
386          help="Print the list of all references",
387      )
388  
389      parser.parse_args()
390      args = parser.parse_args()
391      if args.rtl_list:
392          with open(args.rtl_list, "r") as rtl_list_file:
393              rtl_list = [line.strip for line in rtl_list_file]
394      else:
395          if not args.rtl_dir:
396              raise RuntimeError("Either --rtl-list or --rtl-dir must be specified")
397          rtl_list = list(find_files_recursive(args.rtl_dir, ".expand"))
398  
399      if not rtl_list:
400          raise RuntimeError("No RTL files specified")
401  
402      _, refs = get_symbols_and_refs(rtl_list, args.elf_file)
403  
404      if args.action == "find-refs":
405          from_sections = args.from_sections.split(",") if args.from_sections else []
406          to_sections = args.to_sections.split(",") if args.to_sections else []
407          found = list_refs_from_to_sections(
408              refs, from_sections, to_sections
409          )
410          if args.exit_code and found:
411              raise SystemExit(1)
412      elif args.action == "all-refs":
413          for r in refs:
414              print(str(r))
415  
416  
417  if __name__ == "__main__":
418      main()