test: test for missing dependencies (#7278)

* tests: import test for missing libraries

* build: add missing dependencies

* refactor: use glob instead of tree walk

* test: extract constants + more documentation
This commit is contained in:
Tobias Wochinger 2024-03-05 12:14:10 +01:00 committed by GitHub
parent 38a80b0235
commit 655d4a1a8d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 75 additions and 1 deletions

View File

@ -4,12 +4,15 @@ from datetime import datetime
from typing import List from typing import List
import pandas as pd import pandas as pd
import pytest
from haystack.dataclasses import Document from haystack.dataclasses import Document
from haystack.document_stores.errors import DuplicateDocumentError from haystack.document_stores.errors import DuplicateDocumentError
from haystack.document_stores.types import DocumentStore, DuplicatePolicy from haystack.document_stores.types import DocumentStore, DuplicatePolicy
from haystack.errors import FilterError from haystack.errors import FilterError
from haystack.lazy_imports import LazyImport
with LazyImport("Run 'pip install pytest'") as pytest_import:
import pytest
def _random_embeddings(n): def _random_embeddings(n):

View File

@ -59,6 +59,9 @@ dependencies = [
"networkx", # Pipeline graphs "networkx", # Pipeline graphs
"typing_extensions>=4.7", # typing support for Python 3.8 "typing_extensions>=4.7", # typing support for Python 3.8
"boilerpy3", # Fulltext extraction from HTML pages "boilerpy3", # Fulltext extraction from HTML pages
"requests",
"numpy",
"python-dateutil",
] ]
[tool.hatch.envs.default] [tool.hatch.envs.default]
@ -127,6 +130,10 @@ extra-dependencies = [
# Structured logging # Structured logging
"structlog", "structlog",
# Looking for missing imports
"isort",
"pyproject-parser",
] ]
[tool.hatch.envs.test.scripts] [tool.hatch.envs.test.scripts]

64
test/test_imports.py Normal file
View File

@ -0,0 +1,64 @@
import ast
import os
from _ast import Import, ImportFrom
from pathlib import Path
import isort
import toml
from pyproject_parser import PyProject
# Some libraries have different names in the import and in the dependency
# If below test fails due to that, add the library name to the dictionary
LIBRARY_NAMES_TO_MODULE_NAMES = {"python-dateutil": "dateutil"}
# Some standard libraries are not detected by isort. If below test fails due to that, add the library name to the set.
ADDITIONAL_STD_LIBS = {"yaml"}
def test_for_missing_dependencies() -> None:
# We implement this manual check because
# - All tools out there are too powerful because they find all the imports in the haystack package
# - if we import all modules to check the imports we don't find issues with direct dependencies which are also
# sub-dependencies of other dependencies
#### Collect imports
top_level_imports = set()
for path in Path("haystack").glob("**/*.py"):
content = path.read_text(encoding="utf-8")
tree = ast.parse(content)
for item in tree.body:
if isinstance(item, Import):
module = item.names[0].name
elif isinstance(item, ImportFrom) and item.level == 0: # level > 1 are relative imports
module = item.module
else:
# we only care about imports
break
top_level_imports.add(module.split(".")[0])
third_party_modules = {
module
for module in top_level_imports
if isort.place_module(module) == "THIRDPARTY" and module not in ADDITIONAL_STD_LIBS
}
#### Load specified dependencies
parsed = toml.load("pyproject.toml")
# Pyproject complains about our pyproject.toml file, so we need to parse only the dependencies
# We still need `PyProject` to parse the dependencies (think of markers and stuff)
only_dependencies = {"project": {"name": "test", "dependencies": parsed["project"]["dependencies"]}}
project_dependencies = PyProject.project_table_parser.parse(only_dependencies["project"], set_defaults=True)[
"dependencies"
]
project_dependency_modules = set()
for dep in project_dependencies:
if dep.name in LIBRARY_NAMES_TO_MODULE_NAMES:
project_dependency_modules.add(LIBRARY_NAMES_TO_MODULE_NAMES[dep.name])
project_dependency_modules.add(dep.name.replace("-", "_"))
#### And now finally; the check
for module in third_party_modules:
assert module in project_dependency_modules, f"Module {module} is not in the dependencies"