From de7099e5605fb89d8bca4623dc339bdacc7e18bb Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Fri, 29 Nov 2024 15:00:59 +0100 Subject: [PATCH] ci: add job to check imports (#8594) * try checking imports * clarify error message * better fmt * do not show complete list of successfully imported packages * refinements * relnote * add missing forward references * better function name * linting * fix linting * Update .github/utils/check_imports.py Co-authored-by: Madeesh Kannan --------- Co-authored-by: Madeesh Kannan --- .github/utils/check_imports.py | 74 +++++++++++++++++++ .github/workflows/tests.yml | 47 ++++++++++++ .../components/connectors/openapi_service.py | 4 +- pyproject.toml | 4 - .../notes/check-imports-8a2aa18c73e5c492.yaml | 6 ++ test/test_imports.py | 67 ----------------- 6 files changed, 129 insertions(+), 73 deletions(-) create mode 100644 .github/utils/check_imports.py create mode 100644 releasenotes/notes/check-imports-8a2aa18c73e5c492.yaml delete mode 100644 test/test_imports.py diff --git a/.github/utils/check_imports.py b/.github/utils/check_imports.py new file mode 100644 index 000000000..6867199a0 --- /dev/null +++ b/.github/utils/check_imports.py @@ -0,0 +1,74 @@ +import os +import sys +import importlib +import traceback +from haystack import logging # pylint: disable=unused-import # this is needed to avoid circular imports + +def validate_package_imports(directory: str): + """ + Recursively search for directories with __init__.py and import them. + """ + imported = [] + failed = [] + + sys.path.insert(0, directory) + + for root, _, files in os.walk(directory): + # skip directories without __init__.py + if not '__init__.py' in files: + continue + + init_path = os.path.join(root, '__init__.py') + + # skip haystack/__init__.py to avoid circular imports + if init_path.endswith(f'{directory}/__init__.py'): + continue + + # convert filesystem path to Python module name + relative_path = os.path.relpath(root, directory) + module_name = relative_path.replace(os.path.sep, '.') + if module_name == '.': + module_name = os.path.basename(directory) + + try: + importlib.import_module(module_name) + imported.append(module_name) + except: + failed.append({ + 'module': module_name, + 'traceback': traceback.format_exc() + }) + + return imported, failed + + +def main(): + """ + This script checks that all Haystack packages can be imported successfully. + This can detect several issues. + One example is forgetting to use a forward reference for a type hint coming + from a lazy import. + """ + print("Checking imports from Haystack packages...") + imported, failed = validate_package_imports(directory="haystack") + + if not imported: + print("\nNO PACKAGES WERE IMPORTED") + sys.exit(1) + + print(f"\nSUCCESSFULLY IMPORTED {len(imported)} PACKAGES") + + if failed: + print(f"\nFAILED TO IMPORT {len(failed)} PACKAGES:") + for fail in failed: + print(f" - {fail['module']}") + + print("\nERRORS:") + for fail in failed: + print(f" - {fail['module']}\n") + print(f" {fail['traceback']}\n\n") + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5e2af839a..26bc30066 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -85,6 +85,53 @@ jobs: - "branch:${{ github.ref_name }}" - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + check-imports: + needs: format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" + + - name: Install Hatch + run: pip install hatch==${{ env.HATCH_VERSION }} + + - name: Check imports + run: hatch run python .github/utils/check_imports.py + + - name: Calculate alert data + id: calculator + shell: bash + if: (success() || failure()) && github.ref_name == 'main' + run: | + if [ "${{ job.status }}" = "success" ]; then + echo "alert_type=success" >> "$GITHUB_OUTPUT"; + else + echo "alert_type=error" >> "$GITHUB_OUTPUT"; + fi + + - name: Send event to Datadog + if: (success() || failure()) && github.ref_name == 'main' + uses: masci/datadog@v1 + with: + api-key: ${{ secrets.CORE_DATADOG_API_KEY }} + api-url: https://api.datadoghq.eu + events: | + - title: "${{ github.workflow }} workflow" + text: "Job ${{ github.job }} in branch ${{ github.ref_name }}" + alert_type: "${{ steps.calculator.outputs.alert_type }}" + source_type_name: "Github" + host: ${{ github.repository_owner }} + tags: + - "project:${{ github.repository }}" + - "job:${{ github.job }}" + - "run_id:${{ github.run_id }}" + - "workflow:${{ github.workflow }}" + - "branch:${{ github.ref_name }}" + - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" + unit-tests: name: Unit / ${{ matrix.os }} needs: format diff --git a/haystack/components/connectors/openapi_service.py b/haystack/components/connectors/openapi_service.py index ea89dde54..68116e308 100644 --- a/haystack/components/connectors/openapi_service.py +++ b/haystack/components/connectors/openapi_service.py @@ -181,7 +181,7 @@ class OpenAPIServiceConnector: ) return function_payloads - def _authenticate_service(self, openapi_service: OpenAPI, credentials: Optional[Union[dict, str]] = None): + def _authenticate_service(self, openapi_service: "OpenAPI", credentials: Optional[Union[dict, str]] = None): """ Authentication with an OpenAPI service. @@ -236,7 +236,7 @@ class OpenAPIServiceConnector: f"for it. Check the service configuration and credentials." ) - def _invoke_method(self, openapi_service: OpenAPI, method_invocation_descriptor: Dict[str, Any]) -> Any: + def _invoke_method(self, openapi_service: "OpenAPI", method_invocation_descriptor: Dict[str, Any]) -> Any: """ Invokes the specified method on the OpenAPI service. diff --git a/pyproject.toml b/pyproject.toml index d8432f225..172147c61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,10 +123,6 @@ extra-dependencies = [ # Structured logging "structlog", - # Looking for missing imports - "isort", - "pyproject-parser", - # Test "pytest", "pytest-bdd", diff --git a/releasenotes/notes/check-imports-8a2aa18c73e5c492.yaml b/releasenotes/notes/check-imports-8a2aa18c73e5c492.yaml new file mode 100644 index 000000000..2d1814e89 --- /dev/null +++ b/releasenotes/notes/check-imports-8a2aa18c73e5c492.yaml @@ -0,0 +1,6 @@ +--- +enhancements: + - | + Add a testing job to check that all packages can be imported successfully. + This should help detecting several issues, such as forgetting to use a + forward reference for a type hint coming from a lazy import. diff --git a/test/test_imports.py b/test/test_imports.py deleted file mode 100644 index fc9da51dd..000000000 --- a/test/test_imports.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-FileCopyrightText: 2022-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 -import ast -import os -from _ast import Import, ImportFrom -from pathlib import Path - -import isort -import toml -from pyproject_parser import PyProject - -# Some libraries have different names in the import and in the dependency -# If below test fails due to that, add the library name to the dictionary -LIBRARY_NAMES_TO_MODULE_NAMES = {"python-dateutil": "dateutil"} - -# Some standard libraries are not detected by isort. If below test fails due to that, add the library name to the set. -ADDITIONAL_STD_LIBS = {"yaml"} - - -def test_for_missing_dependencies() -> None: - # We implement this manual check because - # - All tools out there are too powerful because they find all the imports in the haystack package - # - if we import all modules to check the imports we don't find issues with direct dependencies which are also - # sub-dependencies of other dependencies - - #### Collect imports - top_level_imports = set() - for path in Path("haystack").glob("**/*.py"): - content = path.read_text(encoding="utf-8") - tree = ast.parse(content) - for item in tree.body: - if isinstance(item, Import): - module = item.names[0].name - elif isinstance(item, ImportFrom) and item.level == 0: # level > 1 are relative imports - module = item.module - else: - # we only care about imports - break - - top_level_imports.add(module.split(".")[0]) - - third_party_modules = { - module - for module in top_level_imports - if isort.place_module(module) == "THIRDPARTY" and module not in ADDITIONAL_STD_LIBS - } - - #### Load specified dependencies - parsed = toml.load("pyproject.toml") - # Pyproject complains about our pyproject.toml file, so we need to parse only the dependencies - # We still need `PyProject` to parse the dependencies (think of markers and stuff) - only_dependencies = {"project": {"name": "test", "dependencies": parsed["project"]["dependencies"]}} - project_dependencies = PyProject.project_table_parser.parse(only_dependencies["project"], set_defaults=True)[ - "dependencies" - ] - - project_dependency_modules = set() - for dep in project_dependencies: - if dep.name in LIBRARY_NAMES_TO_MODULE_NAMES: - project_dependency_modules.add(LIBRARY_NAMES_TO_MODULE_NAMES[dep.name]) - - project_dependency_modules.add(dep.name.replace("-", "_")) - - #### And now finally; the check - for module in third_party_modules: - assert module in project_dependency_modules, f"Module {module} is not in the dependencies"