mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-02-01 04:23:16 +00:00
* Add BasePipeline.validate_config, BasePipeline.validate_yaml, and some new custom exception classes * Make error composition work properly * Clarify typing * Help mypy a bit more * Update Documentation & Code Style * Enable autogenerated docs for Milvus1 and 2 separately * Revert "Enable autogenerated docs for Milvus1 and 2 separately" This reverts commit 282be4a78a6e95862a9b4c924fc3dea5ca71e28d. * Update Documentation & Code Style * Re-enable 'additionalProperties: False' * Add pipeline.type to JSON Schema, was somehow forgotten * Disable additionalProperties on the pipeline properties too * Fix json-schemas for 1.1.0 and 1.2.0 (should not do it again in the future) * Cal super in PipelineValidationError * Improve _read_pipeline_config_from_yaml's error handling * Fix generate_json_schema.py to include document stores * Fix json schemas (retro-fix 1.1.0 again) * Improve custom errors printing, add link to docs * Add function in BaseComponent to list its subclasses in a module * Make some document stores base classes abstract * Add marker 'integration' in pytest flags * Slighly improve validation of pipelines at load * Adding tests for YAML loading and validation * Make custom_query Optional for validation issues * Fix bug in _read_pipeline_config_from_yaml * Improve error handling in BasePipeline and Pipeline and add DAG check * Move json schema generation into haystack/nodes/_json_schema.py (useful for tests) * Simplify errors slightly * Add some YAML validation tests * Remove load_from_config from BasePipeline, it was never used anyway * Improve tests * Include json-schemas in package * Fix conftest imports * Make BasePipeline abstract * Improve mocking by making the test independent from the YAML version * Add exportable_to_yaml decorator to forget about set_config on mock nodes * Fix mypy errors * Comment out one monkeypatch * Fix typing again * Improve error message for validation * Add required properties to pipelines * Fix YAML version for REST API YAMLs to 1.2.0 * Fix load_from_yaml call in load_from_deepset_cloud * fix HaystackError.__getattr__ * Add super().__init__()in most nodes and docstore, comment set_config * Remove type from REST API pipelines * Remove useless init from doc2answers * Call super in Seq3SeqGenerator * Typo in deepsetcloud.py * Fix rest api indexing error mismatch and mock version of JSON schema in all tests * Working on pipeline tests * Improve errors printing slightly * Add back test_pipeline.yaml * _json_schema.py supports different versions with identical schemas * Add type to 0.7 schema for backwards compatibility * Fix small bug in _json_schema.py * Try alternative to generate json schemas on the CI * Update Documentation & Code Style * Make linux CI match autoformat CI * Fix super-init-not-called * Accidentally committed file * Update Documentation & Code Style * fix test_summarizer_translation.py's import * Mock YAML in a few suites, split and simplify test_pipeline_debug_and_validation.py::test_invalid_run_args * Fix json schema for ray tests too * Update Documentation & Code Style * Reintroduce validation * Usa unstable version in tests and rest api * Make unstable support the latest versions * Update Documentation & Code Style * Remove needless fixture * Make type in pipeline optional in the strings validation * Fix schemas * Fix string validation for pipeline type * Improve validate_config_strings * Remove type from test p[ipelines * Update Documentation & Code Style * Fix test_pipeline * Removing more type from pipelines * Temporary CI patc * Fix issue with exportable_to_yaml never invoking the wrapped init * rm stray file * pipeline tests are green again * Linux CI now needs .[all] to generate the schema * Bugfixes, pipeline tests seems to be green * Typo in version after merge * Implement missing methods in Weaviate * Trying to avoid FAISS tests from running in the Milvus1 test suite * Fix some stray test paths and faiss index dumping * Fix pytest markers list * Temporarily disable cache to be able to see tests failures * Fix pyproject.toml syntax * Use only tmp_path * Fix preprocessor signature after merge * Fix faiss bug * Fix Ray test * Fix documentation issue by removing quotes from faiss type * Update Documentation & Code Style * use document properly in preprocessor tests * Update Documentation & Code Style * make preprocessor capable of handling documents * import document * Revert support for documents in preprocessor, do later * Fix bug in _json_schema.py that was breaking validation * re-enable cache * Update Documentation & Code Style * Simplify calling _json_schema.py from the CI * Remove redundant ABC inheritance * Ensure exportable_to_yaml works only on implementations * Rename subclass to class_ in Meta * Make run() and get_config() abstract in BasePipeline * Revert unintended change in preprocessor * Move outgoing_edges_input_node check inside try block * Rename VALID_CODE_GEN_INPUT_REGEX into VALID_INPUT_REGEX * Add check for a RecursionError on validate_config_strings * Address usages of _pipeline_config in data silo and elasticsearch * Rename _pipeline_config into _init_parameters * Fix pytest marker and remove unused imports * Remove most redundant ABCs * Rename _init_parameters into _component_configuration * Remove set_config and type from _component_configuration's dict * Remove last instances of set_config and replace with super().__init__() * Implement __init_subclass__ approach * Simplify checks on the existence of _component_configuration * Fix faiss issue * Dynamic generation of node schemas & weed out old schemas * Add debatable test * Add docstring to debatable test * Positive diff between schemas implemented * Improve diff printing * Rename REST API YAML files to trigger IDE validation * Fix typing issues * Fix more typing * Typo in YAML filename * Remove needless type:ignore * Add tests * Fix tests & validation feedback for accessory classes in custom nodes * Refactor RAGeneratorType out * Fix broken import in conftest * Improve source error handling * Remove unused import in test_eval.py breaking tests * Fix changed error message in tests matches too * Normalize generate_openapi_specs.py and generate_json_schema.py in the actions * Fix path to generate_openapi_specs.py in autoformat.yml * Update Documentation & Code Style * Add test for FAISSDocumentStore-like situations (superclass with init params) * Update Documentation & Code Style * Fix indentation * Remove commented set_config * Store model_name_or_path in FARMReader to use in DistillationDataSilo * Rename _component_configuration into _component_config * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
210 lines
8.2 KiB
Python
210 lines
8.2 KiB
Python
from typing import Union
|
|
from types import ModuleType
|
|
|
|
try:
|
|
from importlib import metadata
|
|
except (ModuleNotFoundError, ImportError):
|
|
# Python <= 3.7
|
|
import importlib_metadata as metadata # type: ignore
|
|
|
|
__version__: str = str(metadata.version("farm-haystack"))
|
|
|
|
|
|
# This configuration must be done before any import to apply to all submodules
|
|
import logging
|
|
|
|
logging.basicConfig(
|
|
format="%(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.WARNING
|
|
)
|
|
logging.getLogger("haystack").setLevel(logging.INFO)
|
|
|
|
from haystack import pipelines
|
|
from haystack.schema import Document, Answer, Label, MultiLabel, Span
|
|
from haystack.nodes import BaseComponent
|
|
from haystack.pipelines import Pipeline
|
|
|
|
import pandas as pd
|
|
|
|
pd.options.display.max_colwidth = 80
|
|
|
|
|
|
# ###########################################
|
|
# Enable old style imports (temporary)
|
|
import sys
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Wrapper emitting a warning on import
|
|
def DeprecatedModule(mod, deprecated_attributes=None, is_module_deprecated=True):
|
|
"""
|
|
Return a wrapped object that warns about deprecated accesses at import
|
|
"""
|
|
|
|
class DeprecationWrapper:
|
|
warned = []
|
|
|
|
def __getattr__(self, attr):
|
|
is_a_deprecated_attr = deprecated_attributes and attr in deprecated_attributes
|
|
is_a_deprecated_module = is_module_deprecated and attr not in ["__path__", "__spec__", "__name__"]
|
|
warning_already_emitted = attr in self.warned
|
|
attribute_exists = getattr(mod, attr) is not None
|
|
|
|
if (is_a_deprecated_attr or is_a_deprecated_module) and not warning_already_emitted and attribute_exists:
|
|
logger.warn(
|
|
f"Object '{attr}' is imported through a deprecated path. Please check out the docs for the new import path."
|
|
)
|
|
self.warned.append(attr)
|
|
return getattr(mod, attr)
|
|
|
|
return DeprecationWrapper()
|
|
|
|
|
|
# All modules to be aliased need to be imported here
|
|
|
|
# This self-import is used to monkey-patch, keep for now
|
|
import haystack # pylint: disable=import-self
|
|
from haystack.nodes import (
|
|
connector,
|
|
document_classifier,
|
|
extractor,
|
|
file_converter,
|
|
answer_generator as generator,
|
|
preprocessor,
|
|
question_generator,
|
|
ranker,
|
|
reader,
|
|
retriever,
|
|
summarizer,
|
|
translator,
|
|
)
|
|
|
|
# Note that we ignore the ImportError here because if the user did not install
|
|
# the correct dependency group for a document store, we don't need to setup
|
|
# import warnings for that, so the import here is useless and should fail silently.
|
|
|
|
document_stores: Union[ModuleType, None] = None
|
|
try:
|
|
from haystack import document_stores
|
|
except ImportError:
|
|
pass
|
|
|
|
graph_retriever: Union[ModuleType, None] = None
|
|
try:
|
|
from haystack.nodes.retriever import text2sparql as graph_retriever
|
|
except ImportError:
|
|
pass
|
|
|
|
knowledge_graph: Union[ModuleType, None] = None
|
|
try:
|
|
from haystack.document_stores import graphdb as knowledge_graph
|
|
except ImportError:
|
|
pass
|
|
|
|
from haystack.modeling.evaluation import eval
|
|
from haystack.modeling.logger import MLFlowLogger, StdoutLogger, TensorBoardLogger
|
|
from haystack.nodes.other import JoinDocuments, Docs2Answers, JoinAnswers, RouteDocuments
|
|
from haystack.nodes.query_classifier import SklearnQueryClassifier, TransformersQueryClassifier
|
|
from haystack.nodes.file_classifier import FileTypeClassifier
|
|
from haystack.utils import preprocessing
|
|
import haystack.modeling.utils as modeling_utils
|
|
from haystack.utils import cleaning
|
|
|
|
# For the alias to work as an importable module (like `from haystack import reader`),
|
|
# modules need to be set as attributes of their parent model.
|
|
# To make chain imports work (`from haystack.reader import FARMReader`) the module
|
|
# needs to be also present in sys.modules with its complete import path.
|
|
if knowledge_graph:
|
|
setattr(knowledge_graph, "graphdb", DeprecatedModule(knowledge_graph))
|
|
sys.modules["haystack.knowledge_graph.graphdb"] = DeprecatedModule(knowledge_graph)
|
|
|
|
setattr(preprocessor, "utils", DeprecatedModule(preprocessing))
|
|
setattr(preprocessor, "cleaning", DeprecatedModule(cleaning))
|
|
sys.modules["haystack.preprocessor.utils"] = DeprecatedModule(preprocessing)
|
|
sys.modules["haystack.preprocessor.cleaning"] = DeprecatedModule(cleaning)
|
|
|
|
setattr(haystack, "document_store", DeprecatedModule(document_stores))
|
|
setattr(haystack, "connector", DeprecatedModule(connector))
|
|
setattr(haystack, "generator", DeprecatedModule(generator))
|
|
setattr(haystack, "document_classifier", DeprecatedModule(document_classifier))
|
|
setattr(haystack, "extractor", DeprecatedModule(extractor))
|
|
setattr(haystack, "eval", DeprecatedModule(eval))
|
|
setattr(haystack, "file_converter", DeprecatedModule(file_converter, deprecated_attributes=["FileTypeClassifier"]))
|
|
setattr(haystack, "knowledge_graph", DeprecatedModule(knowledge_graph, deprecated_attributes=["graphdb"]))
|
|
setattr(
|
|
haystack,
|
|
"pipeline",
|
|
DeprecatedModule(
|
|
pipelines,
|
|
deprecated_attributes=[
|
|
"JoinDocuments",
|
|
"Docs2Answers",
|
|
"SklearnQueryClassifier",
|
|
"TransformersQueryClassifier",
|
|
],
|
|
),
|
|
)
|
|
setattr(haystack, "preprocessor", DeprecatedModule(preprocessor, deprecated_attributes=["utils", "cleaning"]))
|
|
setattr(haystack, "question_generator", DeprecatedModule(question_generator))
|
|
setattr(haystack, "ranker", DeprecatedModule(ranker))
|
|
setattr(haystack, "reader", DeprecatedModule(reader))
|
|
setattr(haystack, "retriever", DeprecatedModule(retriever))
|
|
setattr(haystack, "summarizer", DeprecatedModule(summarizer))
|
|
setattr(haystack, "translator", DeprecatedModule(translator))
|
|
sys.modules["haystack.document_store"] = DeprecatedModule(document_stores)
|
|
sys.modules["haystack.connector"] = DeprecatedModule(connector)
|
|
sys.modules["haystack.generator"] = DeprecatedModule(generator)
|
|
sys.modules["haystack.document_classifier"] = DeprecatedModule(document_classifier)
|
|
sys.modules["haystack.extractor"] = DeprecatedModule(extractor)
|
|
sys.modules["haystack.eval"] = DeprecatedModule(eval)
|
|
sys.modules["haystack.file_converter"] = DeprecatedModule(file_converter)
|
|
sys.modules["haystack.knowledge_graph"] = DeprecatedModule(knowledge_graph)
|
|
sys.modules["haystack.pipeline"] = DeprecatedModule(pipelines)
|
|
sys.modules["haystack.preprocessor"] = DeprecatedModule(preprocessor, deprecated_attributes=["utils", "cleaning"])
|
|
sys.modules["haystack.question_generator"] = DeprecatedModule(question_generator)
|
|
sys.modules["haystack.ranker"] = DeprecatedModule(ranker)
|
|
sys.modules["haystack.reader"] = DeprecatedModule(reader)
|
|
sys.modules["haystack.retriever"] = DeprecatedModule(retriever)
|
|
sys.modules["haystack.summarizer"] = DeprecatedModule(summarizer)
|
|
sys.modules["haystack.translator"] = DeprecatedModule(translator)
|
|
if graph_retriever:
|
|
setattr(haystack, "graph_retriever", DeprecatedModule(graph_retriever))
|
|
sys.modules["haystack.graph_retriever"] = DeprecatedModule(graph_retriever)
|
|
|
|
# To be imported from modules, classes need only to be set as attributes,
|
|
# they don't need to be present in sys.modules too.
|
|
# Adding them to sys.modules would enable `import haystack.pipelines.JoinDocuments`,
|
|
# which I believe it's a very rare import style.
|
|
setattr(file_converter, "FileTypeClassifier", FileTypeClassifier)
|
|
setattr(modeling_utils, "MLFlowLogger", MLFlowLogger)
|
|
setattr(modeling_utils, "StdoutLogger", StdoutLogger)
|
|
setattr(modeling_utils, "TensorBoardLogger", TensorBoardLogger)
|
|
setattr(pipelines, "JoinDocuments", JoinDocuments)
|
|
setattr(pipelines, "Docs2Answers", Docs2Answers)
|
|
setattr(pipelines, "SklearnQueryClassifier", SklearnQueryClassifier)
|
|
setattr(pipelines, "TransformersQueryClassifier", TransformersQueryClassifier)
|
|
|
|
# This last line is used to throw the deprecation error for imports like `from haystack import connector`
|
|
deprecated_attributes = [
|
|
"document_store",
|
|
"connector",
|
|
"generator",
|
|
"document_classifier",
|
|
"extractor",
|
|
"eval",
|
|
"file_converter",
|
|
"knowledge_graph",
|
|
"pipeline",
|
|
"preprocessor",
|
|
"question_generator",
|
|
"ranker",
|
|
"reader",
|
|
"retriever",
|
|
"summarizer",
|
|
"translator",
|
|
]
|
|
if graph_retriever:
|
|
deprecated_attributes.append("graph_retriever")
|
|
sys.modules["haystack"] = DeprecatedModule(
|
|
haystack, is_module_deprecated=False, deprecated_attributes=deprecated_attributes
|
|
)
|