/ .github / workflows / check_api_ref.yml
check_api_ref.yml
 1  name: Check API reference changes
 2  
 3  on:
 4    pull_request:
 5      paths:
 6        - "haystack/**/*.py"
 7        - "pydoc/*.yml"
 8  
 9  jobs:
10    test-api-reference-build:
11      runs-on: ubuntu-slim
12      steps:
13        - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
14          with:
15            fetch-depth: 0
16  
17        - name: Set up Python
18          uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
19          with:
20            python-version: "3.13"
21  
22        - name: Detect API reference changes
23          id: changed
24          shell: python
25          run: |
26            import os
27            import subprocess
28            from pathlib import Path
29  
30            import sys
31            sys.path.insert(0, ".github/utils")
32            from docstrings_checksum import docstrings_checksum
33  
34            def git(*args):
35                result = subprocess.run(["git", *args], capture_output=True, text=True)
36                return result.stdout.strip(), result.returncode
37  
38            base_sha, _ = git("rev-parse", "HEAD^1")
39            diff_output, _ = git("diff", "--name-only", f"{base_sha}...HEAD")
40            changed_files = set(diff_output.splitlines())
41  
42            needs_check = False
43  
44            # If any pydoc config changed, always rebuild
45            if any(f.startswith("pydoc/") and f.endswith(".yml") for f in changed_files):
46                needs_check = True
47  
48            # If Python files changed, compare docstring checksums
49            if not needs_check and any(f.startswith("haystack/") and f.endswith(".py") for f in changed_files):
50                runner_temp = os.environ["RUNNER_TEMP"]
51                base_worktree = os.path.join(runner_temp, "base")
52                _, rc = git("worktree", "add", base_worktree, base_sha)
53  
54                pr_checksum = docstrings_checksum(Path(".").glob("haystack/**/*.py"))
55                base_checksum = ""
56                if rc == 0:
57                    base_checksum = docstrings_checksum(Path(base_worktree).glob("haystack/**/*.py"))
58  
59                if pr_checksum != base_checksum:
60                    needs_check = True
61  
62            print(f"API reference check needed: {needs_check}")
63            with open(os.environ["GITHUB_OUTPUT"], "a") as f:
64                f.write(f"needs_check={str(needs_check).lower()}\n")
65  
66        - name: Install Hatch
67          if: steps.changed.outputs.needs_check == 'true'
68          run: pip install hatch
69  
70        - name: Generate API references
71          if: steps.changed.outputs.needs_check == 'true'
72          run: hatch run docs
73  
74        - name: Set up Node.js
75          if: steps.changed.outputs.needs_check == 'true'
76          uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
77          with:
78            node-version: "22"
79  
80        - name: Run Docusaurus md/mdx checker
81          if: steps.changed.outputs.needs_check == 'true'
82          working-directory: tmp_api_reference
83          run: |
84            # docusaurus-mdx-checker is a package that is not frequently updated. Its dependency katex sometimes ships a
85            # broken ESM build, where a __VERSION__ placeholder is left unresolved, causing a ReferenceError at import time.
86            # Node 22+ prefers ESM when available. We force CJS (CommonJS) resolution to use the working katex build.
87            # This should be safe because docusaurus-mdx-checker and its dependencies provide CJS builds.
88            export NODE_OPTIONS="--conditions=require"
89            npx docusaurus-mdx-checker -v || {
90                echo ""
91                echo "For common MDX problems, see https://docusaurus.io/blog/preparing-your-site-for-docusaurus-v3#common-mdx-problems"
92                exit 1
93              }