diff --git a/.github/utils/docstrings_checksum.py b/.github/utils/docstrings_checksum.py new file mode 100644 index 000000000..c8bf9ab4e --- /dev/null +++ b/.github/utils/docstrings_checksum.py @@ -0,0 +1,43 @@ +from pathlib import Path +from typing import Iterator + +import ast +import hashlib + + +def docstrings_checksum(python_files: Iterator[Path]): + files_content = (f.read_text() for f in python_files) + trees = (ast.parse(c) for c in files_content) + + # Get all docstrings from async functions, functions, + # classes and modules definitions + docstrings = [] + for tree in trees: + for node in ast.walk(tree): + if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)): + # Skip all node types that can't have docstrings to prevent failures + continue + docstring = ast.get_docstring(node) + if docstring: + docstrings.append(docstring) + + # Sort them to be safe, since ast.walk() returns + # nodes in no specified order. + # See https://docs.python.org/3/library/ast.html#ast.walk + docstrings.sort() + + return hashlib.md5(str(docstrings).encode("utf-8")).hexdigest() + + +if __name__ == "__main__": + # Get all Haystack and rest_api python files + root = Path(__file__).parent.parent.parent + haystack_files = root.glob("haystack/**/*.py") + rest_api_files = root.glob("rest_api/**/*.py") + + import itertools + + python_files = itertools.chain(haystack_files, rest_api_files) + + md5 = docstrings_checksum(python_files) + print(md5) diff --git a/.github/workflows/docstring-labeler.yml b/.github/workflows/docstring-labeler.yml new file mode 100644 index 000000000..983c15493 --- /dev/null +++ b/.github/workflows/docstring-labeler.yml @@ -0,0 +1,50 @@ +name: Add label on docstrings edit + +on: + pull_request: + paths: + - "haystack/**/*.py" + - "rest_api/**/*.py" + +permissions: + pull-requests: write + +jobs: + label: + runs-on: ubuntu-latest + + steps: + - name: Checkout base commit + uses: actions/checkout@v3 + with: + ref: ${{ github.base_ref }} + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Get docstrings + id: base-docstrings + run: | + CHECKSUM=$(python .github/utils/docstrings_checksum.py) + echo "checksum=$CHECKSUM" >> "$GITHUB_OUTPUT" + + - name: Checkout HEAD commit + uses: actions/checkout@v3 + + - name: Get docstrings + id: head-docstrings + run: | + CHECKSUM=$(python .github/utils/docstrings_checksum.py) + echo "checksum=$CHECKSUM" >> "$GITHUB_OUTPUT" + + - name: Check if we should label + id: run-check + run: echo "should_run=${{ steps.base-docstrings.outputs.checksum != steps.head-docstrings.outputs.checksum }}" >> "$GITHUB_OUTPUT" + + - name: Add label + if: ${{ steps.run-check.outputs.should_run == 'true' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh pr edit ${{ github.event.pull_request.html_url }} --add-label "type:documentation"