haystack/.github/utils/docstrings_checksum.py
Stefano Fiorucci c18f81283c
chore: fix deepset_sync.py for pylint + general linting improvements (#9558)
* chore: fix deepset_sync.py for pylint

* check .github with ruff

* fix

* Update .github/utils/pyproject_to_requirements.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
2025-06-27 07:54:22 +00:00

52 lines
1.6 KiB
Python

import ast
import hashlib
from pathlib import Path
from typing import Iterator
def docstrings_checksum(python_files: Iterator[Path]):
"""
Calculate the checksum of the docstrings in the given Python files.
"""
files_content = (f.read_text() for f in python_files)
trees = (ast.parse(c) for c in files_content)
# Get all docstrings from async functions, functions,
# classes and modules definitions
docstrings = []
for tree in trees:
for node in ast.walk(tree):
if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)):
# Skip all node types that can't have docstrings to prevent failures
continue
docstring = ast.get_docstring(node)
if docstring:
docstrings.append(docstring)
# Sort them to be safe, since ast.walk() returns
# nodes in no specified order.
# See https://docs.python.org/3/library/ast.html#ast.walk
docstrings.sort()
return hashlib.md5(str(docstrings).encode("utf-8")).hexdigest()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--root", help="Haystack root folder", required=True, type=Path)
args = parser.parse_args()
# Get all Haystack and rest_api python files
root: Path = args.root.absolute()
haystack_files = root.glob("haystack/**/*.py")
rest_api_files = root.glob("rest_api/**/*.py")
import itertools
python_files = itertools.chain(haystack_files, rest_api_files)
md5 = docstrings_checksum(python_files)
print(md5)