/ .github / utils / docstrings_checksum.py
docstrings_checksum.py
 1  import ast
 2  import hashlib
 3  from collections.abc import Iterator
 4  from pathlib import Path
 5  
 6  
 7  def docstrings_checksum(python_files: Iterator[Path]) -> str:
 8      """
 9      Calculate the checksum of the docstrings in the given Python files.
10      """
11      files_content = (f.read_text() for f in python_files)
12      trees = (ast.parse(c) for c in files_content)
13  
14      # Get all docstrings from async functions, functions,
15      # classes and modules definitions
16      docstrings = []
17      for tree in trees:
18          for node in ast.walk(tree):
19              if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)):
20                  # Skip all node types that can't have docstrings to prevent failures
21                  continue
22              docstring = ast.get_docstring(node)
23              if docstring:
24                  docstrings.append(docstring)
25  
26      # Sort them to be safe, since ast.walk() returns
27      # nodes in no specified order.
28      # See https://docs.python.org/3/library/ast.html#ast.walk
29      docstrings.sort()
30  
31      return hashlib.md5(str(docstrings).encode("utf-8")).hexdigest()
32  
33  
34  if __name__ == "__main__":
35      import argparse
36  
37      parser = argparse.ArgumentParser()
38      parser.add_argument("--root", help="Haystack root folder", required=True, type=Path)
39      args = parser.parse_args()
40  
41      # Get all Haystack and rest_api python files
42      root: Path = args.root.absolute()
43      haystack_files = root.glob("haystack/**/*.py")
44  
45      md5 = docstrings_checksum(haystack_files)
46      print(md5)