docstrings_checksum.py
1 import ast 2 import hashlib 3 from collections.abc import Iterator 4 from pathlib import Path 5 6 7 def docstrings_checksum(python_files: Iterator[Path]) -> str: 8 """ 9 Calculate the checksum of the docstrings in the given Python files. 10 """ 11 files_content = (f.read_text() for f in python_files) 12 trees = (ast.parse(c) for c in files_content) 13 14 # Get all docstrings from async functions, functions, 15 # classes and modules definitions 16 docstrings = [] 17 for tree in trees: 18 for node in ast.walk(tree): 19 if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)): 20 # Skip all node types that can't have docstrings to prevent failures 21 continue 22 docstring = ast.get_docstring(node) 23 if docstring: 24 docstrings.append(docstring) 25 26 # Sort them to be safe, since ast.walk() returns 27 # nodes in no specified order. 28 # See https://docs.python.org/3/library/ast.html#ast.walk 29 docstrings.sort() 30 31 return hashlib.md5(str(docstrings).encode("utf-8")).hexdigest() 32 33 34 if __name__ == "__main__": 35 import argparse 36 37 parser = argparse.ArgumentParser() 38 parser.add_argument("--root", help="Haystack root folder", required=True, type=Path) 39 args = parser.parse_args() 40 41 # Get all Haystack and rest_api python files 42 root: Path = args.root.absolute() 43 haystack_files = root.glob("haystack/**/*.py") 44 45 md5 = docstrings_checksum(haystack_files) 46 print(md5)