mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-26 06:28:33 +00:00
ci: add job to check imports (#8594)
* try checking imports * clarify error message * better fmt * do not show complete list of successfully imported packages * refinements * relnote * add missing forward references * better function name * linting * fix linting * Update .github/utils/check_imports.py Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com> --------- Co-authored-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
This commit is contained in:
parent
163c06f3d6
commit
de7099e560
74
.github/utils/check_imports.py
vendored
Normal file
74
.github/utils/check_imports.py
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
import os
|
||||
import sys
|
||||
import importlib
|
||||
import traceback
|
||||
from haystack import logging # pylint: disable=unused-import # this is needed to avoid circular imports
|
||||
|
||||
def validate_package_imports(directory: str):
|
||||
"""
|
||||
Recursively search for directories with __init__.py and import them.
|
||||
"""
|
||||
imported = []
|
||||
failed = []
|
||||
|
||||
sys.path.insert(0, directory)
|
||||
|
||||
for root, _, files in os.walk(directory):
|
||||
# skip directories without __init__.py
|
||||
if not '__init__.py' in files:
|
||||
continue
|
||||
|
||||
init_path = os.path.join(root, '__init__.py')
|
||||
|
||||
# skip haystack/__init__.py to avoid circular imports
|
||||
if init_path.endswith(f'{directory}/__init__.py'):
|
||||
continue
|
||||
|
||||
# convert filesystem path to Python module name
|
||||
relative_path = os.path.relpath(root, directory)
|
||||
module_name = relative_path.replace(os.path.sep, '.')
|
||||
if module_name == '.':
|
||||
module_name = os.path.basename(directory)
|
||||
|
||||
try:
|
||||
importlib.import_module(module_name)
|
||||
imported.append(module_name)
|
||||
except:
|
||||
failed.append({
|
||||
'module': module_name,
|
||||
'traceback': traceback.format_exc()
|
||||
})
|
||||
|
||||
return imported, failed
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
This script checks that all Haystack packages can be imported successfully.
|
||||
This can detect several issues.
|
||||
One example is forgetting to use a forward reference for a type hint coming
|
||||
from a lazy import.
|
||||
"""
|
||||
print("Checking imports from Haystack packages...")
|
||||
imported, failed = validate_package_imports(directory="haystack")
|
||||
|
||||
if not imported:
|
||||
print("\nNO PACKAGES WERE IMPORTED")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\nSUCCESSFULLY IMPORTED {len(imported)} PACKAGES")
|
||||
|
||||
if failed:
|
||||
print(f"\nFAILED TO IMPORT {len(failed)} PACKAGES:")
|
||||
for fail in failed:
|
||||
print(f" - {fail['module']}")
|
||||
|
||||
print("\nERRORS:")
|
||||
for fail in failed:
|
||||
print(f" - {fail['module']}\n")
|
||||
print(f" {fail['traceback']}\n\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
47
.github/workflows/tests.yml
vendored
47
.github/workflows/tests.yml
vendored
@ -85,6 +85,53 @@ jobs:
|
||||
- "branch:${{ github.ref_name }}"
|
||||
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
|
||||
check-imports:
|
||||
needs: format
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "${{ env.PYTHON_VERSION }}"
|
||||
|
||||
- name: Install Hatch
|
||||
run: pip install hatch==${{ env.HATCH_VERSION }}
|
||||
|
||||
- name: Check imports
|
||||
run: hatch run python .github/utils/check_imports.py
|
||||
|
||||
- name: Calculate alert data
|
||||
id: calculator
|
||||
shell: bash
|
||||
if: (success() || failure()) && github.ref_name == 'main'
|
||||
run: |
|
||||
if [ "${{ job.status }}" = "success" ]; then
|
||||
echo "alert_type=success" >> "$GITHUB_OUTPUT";
|
||||
else
|
||||
echo "alert_type=error" >> "$GITHUB_OUTPUT";
|
||||
fi
|
||||
|
||||
- name: Send event to Datadog
|
||||
if: (success() || failure()) && github.ref_name == 'main'
|
||||
uses: masci/datadog@v1
|
||||
with:
|
||||
api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
|
||||
api-url: https://api.datadoghq.eu
|
||||
events: |
|
||||
- title: "${{ github.workflow }} workflow"
|
||||
text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
|
||||
alert_type: "${{ steps.calculator.outputs.alert_type }}"
|
||||
source_type_name: "Github"
|
||||
host: ${{ github.repository_owner }}
|
||||
tags:
|
||||
- "project:${{ github.repository }}"
|
||||
- "job:${{ github.job }}"
|
||||
- "run_id:${{ github.run_id }}"
|
||||
- "workflow:${{ github.workflow }}"
|
||||
- "branch:${{ github.ref_name }}"
|
||||
- "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
|
||||
unit-tests:
|
||||
name: Unit / ${{ matrix.os }}
|
||||
needs: format
|
||||
|
||||
@ -181,7 +181,7 @@ class OpenAPIServiceConnector:
|
||||
)
|
||||
return function_payloads
|
||||
|
||||
def _authenticate_service(self, openapi_service: OpenAPI, credentials: Optional[Union[dict, str]] = None):
|
||||
def _authenticate_service(self, openapi_service: "OpenAPI", credentials: Optional[Union[dict, str]] = None):
|
||||
"""
|
||||
Authentication with an OpenAPI service.
|
||||
|
||||
@ -236,7 +236,7 @@ class OpenAPIServiceConnector:
|
||||
f"for it. Check the service configuration and credentials."
|
||||
)
|
||||
|
||||
def _invoke_method(self, openapi_service: OpenAPI, method_invocation_descriptor: Dict[str, Any]) -> Any:
|
||||
def _invoke_method(self, openapi_service: "OpenAPI", method_invocation_descriptor: Dict[str, Any]) -> Any:
|
||||
"""
|
||||
Invokes the specified method on the OpenAPI service.
|
||||
|
||||
|
||||
@ -123,10 +123,6 @@ extra-dependencies = [
|
||||
# Structured logging
|
||||
"structlog",
|
||||
|
||||
# Looking for missing imports
|
||||
"isort",
|
||||
"pyproject-parser",
|
||||
|
||||
# Test
|
||||
"pytest",
|
||||
"pytest-bdd",
|
||||
|
||||
6
releasenotes/notes/check-imports-8a2aa18c73e5c492.yaml
Normal file
6
releasenotes/notes/check-imports-8a2aa18c73e5c492.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
enhancements:
|
||||
- |
|
||||
Add a testing job to check that all packages can be imported successfully.
|
||||
This should help detecting several issues, such as forgetting to use a
|
||||
forward reference for a type hint coming from a lazy import.
|
||||
@ -1,67 +0,0 @@
|
||||
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
import ast
|
||||
import os
|
||||
from _ast import Import, ImportFrom
|
||||
from pathlib import Path
|
||||
|
||||
import isort
|
||||
import toml
|
||||
from pyproject_parser import PyProject
|
||||
|
||||
# Some libraries have different names in the import and in the dependency
|
||||
# If below test fails due to that, add the library name to the dictionary
|
||||
LIBRARY_NAMES_TO_MODULE_NAMES = {"python-dateutil": "dateutil"}
|
||||
|
||||
# Some standard libraries are not detected by isort. If below test fails due to that, add the library name to the set.
|
||||
ADDITIONAL_STD_LIBS = {"yaml"}
|
||||
|
||||
|
||||
def test_for_missing_dependencies() -> None:
|
||||
# We implement this manual check because
|
||||
# - All tools out there are too powerful because they find all the imports in the haystack package
|
||||
# - if we import all modules to check the imports we don't find issues with direct dependencies which are also
|
||||
# sub-dependencies of other dependencies
|
||||
|
||||
#### Collect imports
|
||||
top_level_imports = set()
|
||||
for path in Path("haystack").glob("**/*.py"):
|
||||
content = path.read_text(encoding="utf-8")
|
||||
tree = ast.parse(content)
|
||||
for item in tree.body:
|
||||
if isinstance(item, Import):
|
||||
module = item.names[0].name
|
||||
elif isinstance(item, ImportFrom) and item.level == 0: # level > 1 are relative imports
|
||||
module = item.module
|
||||
else:
|
||||
# we only care about imports
|
||||
break
|
||||
|
||||
top_level_imports.add(module.split(".")[0])
|
||||
|
||||
third_party_modules = {
|
||||
module
|
||||
for module in top_level_imports
|
||||
if isort.place_module(module) == "THIRDPARTY" and module not in ADDITIONAL_STD_LIBS
|
||||
}
|
||||
|
||||
#### Load specified dependencies
|
||||
parsed = toml.load("pyproject.toml")
|
||||
# Pyproject complains about our pyproject.toml file, so we need to parse only the dependencies
|
||||
# We still need `PyProject` to parse the dependencies (think of markers and stuff)
|
||||
only_dependencies = {"project": {"name": "test", "dependencies": parsed["project"]["dependencies"]}}
|
||||
project_dependencies = PyProject.project_table_parser.parse(only_dependencies["project"], set_defaults=True)[
|
||||
"dependencies"
|
||||
]
|
||||
|
||||
project_dependency_modules = set()
|
||||
for dep in project_dependencies:
|
||||
if dep.name in LIBRARY_NAMES_TO_MODULE_NAMES:
|
||||
project_dependency_modules.add(LIBRARY_NAMES_TO_MODULE_NAMES[dep.name])
|
||||
|
||||
project_dependency_modules.add(dep.name.replace("-", "_"))
|
||||
|
||||
#### And now finally; the check
|
||||
for module in third_party_modules:
|
||||
assert module in project_dependency_modules, f"Module {module} is not in the dependencies"
|
||||
Loading…
x
Reference in New Issue
Block a user