mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-27 15:08:43 +00:00
Forbid usage of *args and **kwargs in any node's __init__ (#2362)
* Add failing test * Remove `**kwargs` from docstores' `__init__` functions (#2407) * Remove kwargs from ESDocStore subclasses * Remove kwargs from subclasses of SQLDocumentStore * Remove kwargs from Weaviate * Revert change in pinecone * Fix tests * Fix retriever test wirh weaviate * Change Exception into DocumentStoreError * Update Documentation & Code Style * Remove `**kwargs` from `FARMReader` (#2413) * Remove FARMReader kwargs without trying to replace them functionally * Update Documentation & Code Style * enforce same index values before and after saving/loading eval dataframes (#2398) * Add tests for missing `__init__` and `super().__init__()` in custom nodes (#2350) * Add tests for missing init and super * Update Documentation & Code Style * change in with endswith * Move test in pipeline.py and change test in pipeline_yaml.py * Update Documentation & Code Style * Use caplog to test the warning * Update Documentation & Code Style * move tests into test_pipeline and use get_config * Update Documentation & Code Style * Unmock version name * Improve variadic args test * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
46a50fb979
commit
929c685cda
@ -414,7 +414,7 @@ class ElasticsearchDocumentStore(KeywordDocumentStore)
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
def __init__(host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, username: str = "", password: str = "", api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", scheme: str = "http", ca_certs: Optional[str] = None, verify_certs: bool = True, recreate_index: bool = False, create_index: bool = True, refresh_type: str = "wait_for", similarity="dot_product", timeout=30, return_embedding: bool = False, duplicate_documents: str = "overwrite", index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym", use_system_proxy: bool = False)
|
||||
def __init__(host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, username: str = "", password: str = "", api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", scheme: str = "http", ca_certs: Optional[str] = None, verify_certs: bool = True, recreate_index: bool = False, create_index: bool = True, refresh_type: str = "wait_for", similarity: str = "dot_product", timeout: int = 30, return_embedding: bool = False, duplicate_documents: str = "overwrite", index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym", use_system_proxy: bool = False)
|
||||
```
|
||||
|
||||
A DocumentStore using Elasticsearch to store and query the documents for our search.
|
||||
@ -1231,7 +1231,7 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore)
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
def __init__(verify_certs=False, scheme="https", username="admin", password="admin", port=9200, **kwargs)
|
||||
def __init__(scheme: str = "https", username: str = "admin", password: str = "admin", host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", ca_certs: Optional[str] = None, verify_certs: bool = False, recreate_index: bool = False, create_index: bool = True, refresh_type: str = "wait_for", similarity: str = "dot_product", timeout: int = 30, return_embedding: bool = False, duplicate_documents: str = "overwrite", index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym", use_system_proxy: bool = False)
|
||||
```
|
||||
|
||||
Document Store using OpenSearch (https://opensearch.org/). It is compatible with the AWS Elasticsearch Service.
|
||||
@ -2235,7 +2235,7 @@ the vector embeddings are indexed in a FAISS Index.
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
def __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = None, embedding_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional[faiss.swigfaiss.Index] = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, isolation_level: str = None, **kwargs, ,)
|
||||
def __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = None, embedding_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional[faiss.swigfaiss.Index] = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, isolation_level: str = None, n_links: int = 64, ef_search: int = 20, ef_construction: int = 80)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -2282,6 +2282,9 @@ If specified no other params besides faiss_config_path must be specified.
|
||||
- `faiss_config_path`: Stored FAISS initial configuration parameters.
|
||||
Can be created via calling `save()`
|
||||
- `isolation_level`: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
|
||||
- `n_links`: used only if index_factory == "HNSW"
|
||||
- `ef_search`: used only if index_factory == "HNSW"
|
||||
- `ef_construction`: used only if index_factory == "HNSW"
|
||||
|
||||
<a id="faiss.FAISSDocumentStore.write_documents"></a>
|
||||
|
||||
@ -2545,7 +2548,7 @@ Usage:
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
def __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = None, embedding_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", isolation_level: str = None, **kwargs, ,)
|
||||
def __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = None, embedding_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", isolation_level: str = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
@ -3168,7 +3171,7 @@ The current implementation is not supporting the storage of labels, so you canno
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int, List[int]] = 8080, timeout_config: tuple = (5, 15), username: str = None, password: str = None, index: str = "Document", embedding_dim: int = 768, content_field: str = "content", name_field: str = "name", similarity: str = "cosine", index_type: str = "hnsw", custom_schema: Optional[dict] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", **kwargs, ,)
|
||||
def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int, List[int]] = 8080, timeout_config: tuple = (5, 15), username: str = None, password: str = None, index: str = "Document", embedding_dim: int = 768, content_field: str = "content", name_field: str = "name", similarity: str = "cosine", index_type: str = "hnsw", custom_schema: Optional[dict] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite")
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
|
||||
@ -55,7 +55,7 @@ While the underlying model can vary (BERT, Roberta, DistilBERT, ...), the interf
|
||||
#### \_\_init\_\_
|
||||
|
||||
```python
|
||||
def __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, devices: List[torch.device] = [], no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, confidence_threshold: Optional[float] = None, proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str, bool]] = None, **kwargs, ,)
|
||||
def __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, devices: List[torch.device] = [], no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, confidence_threshold: Optional[float] = None, proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str, bool]] = None)
|
||||
```
|
||||
|
||||
**Arguments**:
|
||||
|
||||
@ -23,6 +23,7 @@ from haystack.document_stores import KeywordDocumentStore
|
||||
from haystack.schema import Document, Label
|
||||
from haystack.document_stores.base import get_batches_from_generator
|
||||
from haystack.document_stores.filter_utils import LogicalFilterClause
|
||||
from haystack.errors import DocumentStoreError
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -54,8 +55,8 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
|
||||
recreate_index: bool = False,
|
||||
create_index: bool = True,
|
||||
refresh_type: str = "wait_for",
|
||||
similarity="dot_product",
|
||||
timeout=30,
|
||||
similarity: str = "dot_product",
|
||||
timeout: int = 30,
|
||||
return_embedding: bool = False,
|
||||
duplicate_documents: str = "overwrite",
|
||||
index_type: str = "flat",
|
||||
@ -179,9 +180,9 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
|
||||
self.scroll = scroll
|
||||
self.skip_missing_embeddings: bool = skip_missing_embeddings
|
||||
if similarity in ["cosine", "dot_product", "l2"]:
|
||||
self.similarity = similarity
|
||||
self.similarity: str = similarity
|
||||
else:
|
||||
raise Exception(
|
||||
raise DocumentStoreError(
|
||||
f"Invalid value {similarity} for similarity in ElasticSearchDocumentStore constructor. Choose between 'cosine', 'l2' and 'dot_product'"
|
||||
)
|
||||
if index_type in ["flat", "hnsw"]:
|
||||
@ -1592,7 +1593,42 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
|
||||
|
||||
|
||||
class OpenSearchDocumentStore(ElasticsearchDocumentStore):
|
||||
def __init__(self, verify_certs=False, scheme="https", username="admin", password="admin", port=9200, **kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
scheme: str = "https", # Mind this different default param
|
||||
username: str = "admin", # Mind this different default param
|
||||
password: str = "admin", # Mind this different default param
|
||||
host: Union[str, List[str]] = "localhost",
|
||||
port: Union[int, List[int]] = 9200,
|
||||
api_key_id: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
aws4auth=None,
|
||||
index: str = "document",
|
||||
label_index: str = "label",
|
||||
search_fields: Union[str, list] = "content",
|
||||
content_field: str = "content",
|
||||
name_field: str = "name",
|
||||
embedding_field: str = "embedding",
|
||||
embedding_dim: int = 768,
|
||||
custom_mapping: Optional[dict] = None,
|
||||
excluded_meta_data: Optional[list] = None,
|
||||
analyzer: str = "standard",
|
||||
ca_certs: Optional[str] = None,
|
||||
verify_certs: bool = False, # Mind this different default param
|
||||
recreate_index: bool = False,
|
||||
create_index: bool = True,
|
||||
refresh_type: str = "wait_for",
|
||||
similarity: str = "dot_product",
|
||||
timeout: int = 30,
|
||||
return_embedding: bool = False,
|
||||
duplicate_documents: str = "overwrite",
|
||||
index_type: str = "flat",
|
||||
scroll: str = "1d",
|
||||
skip_missing_embeddings: bool = True,
|
||||
synonyms: Optional[List] = None,
|
||||
synonym_type: str = "synonym",
|
||||
use_system_proxy: bool = False,
|
||||
):
|
||||
"""
|
||||
Document Store using OpenSearch (https://opensearch.org/). It is compatible with the AWS Elasticsearch Service.
|
||||
|
||||
@ -1662,14 +1698,44 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore):
|
||||
Synonym or Synonym_graph to handle synonyms, including multi-word synonyms correctly during the analysis process.
|
||||
More info at https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-graph-tokenfilter.html
|
||||
"""
|
||||
super().__init__(
|
||||
scheme=scheme,
|
||||
username=username,
|
||||
password=password,
|
||||
host=host,
|
||||
port=port,
|
||||
api_key_id=api_key_id,
|
||||
api_key=api_key,
|
||||
aws4auth=aws4auth,
|
||||
index=index,
|
||||
label_index=label_index,
|
||||
search_fields=search_fields,
|
||||
content_field=content_field,
|
||||
name_field=name_field,
|
||||
embedding_field=embedding_field,
|
||||
embedding_dim=embedding_dim,
|
||||
custom_mapping=custom_mapping,
|
||||
excluded_meta_data=excluded_meta_data,
|
||||
analyzer=analyzer,
|
||||
ca_certs=ca_certs,
|
||||
verify_certs=verify_certs,
|
||||
recreate_index=recreate_index,
|
||||
create_index=create_index,
|
||||
refresh_type=refresh_type,
|
||||
similarity=similarity,
|
||||
timeout=timeout,
|
||||
return_embedding=return_embedding,
|
||||
duplicate_documents=duplicate_documents,
|
||||
index_type=index_type,
|
||||
scroll=scroll,
|
||||
skip_missing_embeddings=skip_missing_embeddings,
|
||||
synonyms=synonyms,
|
||||
synonym_type=synonym_type,
|
||||
use_system_proxy=use_system_proxy,
|
||||
)
|
||||
self.embeddings_field_supports_similarity = False
|
||||
self.similarity_to_space_type = {"cosine": "cosinesimil", "dot_product": "innerproduct", "l2": "l2"}
|
||||
self.space_type_to_similarity = {v: k for k, v in self.similarity_to_space_type.items()}
|
||||
# Overwrite default kwarg values of parent class so that in default cases we can initialize
|
||||
# an OpenSearchDocumentStore without provding any arguments
|
||||
super(OpenSearchDocumentStore, self).__init__(
|
||||
verify_certs=verify_certs, scheme=scheme, username=username, password=password, port=port, **kwargs
|
||||
)
|
||||
|
||||
def query_by_embedding(
|
||||
self,
|
||||
@ -1914,7 +1980,7 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore):
|
||||
if not self.client.indices.exists(index=index_name, headers=headers):
|
||||
raise e
|
||||
|
||||
def _get_embedding_field_mapping(self, similarity: Optional[str]):
|
||||
def _get_embedding_field_mapping(self, similarity: str):
|
||||
space_type = self.similarity_to_space_type[similarity]
|
||||
method: dict = {"space_type": space_type, "name": "hnsw", "engine": "nmslib"}
|
||||
|
||||
@ -2049,10 +2115,79 @@ class OpenDistroElasticsearchDocumentStore(OpenSearchDocumentStore):
|
||||
A DocumentStore which has an Open Distro for Elasticsearch service behind it.
|
||||
"""
|
||||
|
||||
def __init__(self, similarity="cosine", **kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
scheme: str = "https",
|
||||
username: str = "admin",
|
||||
password: str = "admin",
|
||||
host: Union[str, List[str]] = "localhost",
|
||||
port: Union[int, List[int]] = 9200,
|
||||
api_key_id: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
aws4auth=None,
|
||||
index: str = "document",
|
||||
label_index: str = "label",
|
||||
search_fields: Union[str, list] = "content",
|
||||
content_field: str = "content",
|
||||
name_field: str = "name",
|
||||
embedding_field: str = "embedding",
|
||||
embedding_dim: int = 768,
|
||||
custom_mapping: Optional[dict] = None,
|
||||
excluded_meta_data: Optional[list] = None,
|
||||
analyzer: str = "standard",
|
||||
ca_certs: Optional[str] = None,
|
||||
verify_certs: bool = False,
|
||||
recreate_index: bool = False,
|
||||
create_index: bool = True,
|
||||
refresh_type: str = "wait_for",
|
||||
similarity: str = "cosine", # Mind this different default param
|
||||
timeout: int = 30,
|
||||
return_embedding: bool = False,
|
||||
duplicate_documents: str = "overwrite",
|
||||
index_type: str = "flat",
|
||||
scroll: str = "1d",
|
||||
skip_missing_embeddings: bool = True,
|
||||
synonyms: Optional[List] = None,
|
||||
synonym_type: str = "synonym",
|
||||
use_system_proxy: bool = False,
|
||||
):
|
||||
logger.warning(
|
||||
"Open Distro for Elasticsearch has been replaced by OpenSearch! "
|
||||
"See https://opensearch.org/faq/ for details. "
|
||||
"We recommend using the OpenSearchDocumentStore instead."
|
||||
)
|
||||
super(OpenDistroElasticsearchDocumentStore, self).__init__(similarity=similarity, **kwargs)
|
||||
super().__init__(
|
||||
scheme=scheme,
|
||||
username=username,
|
||||
password=password,
|
||||
host=host,
|
||||
port=port,
|
||||
api_key_id=api_key_id,
|
||||
api_key=api_key,
|
||||
aws4auth=aws4auth,
|
||||
index=index,
|
||||
label_index=label_index,
|
||||
search_fields=search_fields,
|
||||
content_field=content_field,
|
||||
name_field=name_field,
|
||||
embedding_field=embedding_field,
|
||||
embedding_dim=embedding_dim,
|
||||
custom_mapping=custom_mapping,
|
||||
excluded_meta_data=excluded_meta_data,
|
||||
analyzer=analyzer,
|
||||
ca_certs=ca_certs,
|
||||
verify_certs=verify_certs,
|
||||
recreate_index=recreate_index,
|
||||
create_index=create_index,
|
||||
refresh_type=refresh_type,
|
||||
similarity=similarity,
|
||||
timeout=timeout,
|
||||
return_embedding=return_embedding,
|
||||
duplicate_documents=duplicate_documents,
|
||||
index_type=index_type,
|
||||
scroll=scroll,
|
||||
skip_missing_embeddings=skip_missing_embeddings,
|
||||
synonyms=synonyms,
|
||||
synonym_type=synonym_type,
|
||||
use_system_proxy=use_system_proxy,
|
||||
)
|
||||
|
||||
@ -57,7 +57,9 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
faiss_index_path: Union[str, Path] = None,
|
||||
faiss_config_path: Union[str, Path] = None,
|
||||
isolation_level: str = None,
|
||||
**kwargs,
|
||||
n_links: int = 64,
|
||||
ef_search: int = 20,
|
||||
ef_construction: int = 80,
|
||||
):
|
||||
"""
|
||||
:param sql_url: SQL connection URL for database. It defaults to local file based SQLite DB. For large scale
|
||||
@ -102,12 +104,15 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
:param faiss_config_path: Stored FAISS initial configuration parameters.
|
||||
Can be created via calling `save()`
|
||||
:param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
|
||||
:param n_links: used only if index_factory == "HNSW"
|
||||
:param ef_search: used only if index_factory == "HNSW"
|
||||
:param ef_construction: used only if index_factory == "HNSW"
|
||||
"""
|
||||
# special case if we want to load an existing index from disk
|
||||
# load init params from disk and run init again
|
||||
if faiss_index_path is not None:
|
||||
sig = signature(self.__class__.__init__)
|
||||
self._validate_params_load_from_disk(sig, locals(), kwargs)
|
||||
self._validate_params_load_from_disk(sig, locals())
|
||||
init_params = self._load_init_params_from_config(faiss_index_path, faiss_config_path)
|
||||
self.__class__.__init__(self, **init_params) # pylint: disable=non-parent-init-called
|
||||
return
|
||||
@ -141,7 +146,9 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
embedding_dim=self.embedding_dim,
|
||||
index_factory=faiss_index_factory_str,
|
||||
metric_type=self.metric_type,
|
||||
**kwargs,
|
||||
n_links=n_links,
|
||||
ef_search=ef_search,
|
||||
ef_construction=ef_construction,
|
||||
)
|
||||
|
||||
self.return_embedding = return_embedding
|
||||
@ -155,8 +162,8 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
|
||||
self._validate_index_sync()
|
||||
|
||||
def _validate_params_load_from_disk(self, sig: Signature, locals: dict, kwargs: dict):
|
||||
allowed_params = ["faiss_index_path", "faiss_config_path", "self", "kwargs"]
|
||||
def _validate_params_load_from_disk(self, sig: Signature, locals: dict):
|
||||
allowed_params = ["faiss_index_path", "faiss_config_path", "self"]
|
||||
invalid_param_set = False
|
||||
|
||||
for param in sig.parameters.values():
|
||||
@ -164,7 +171,7 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
invalid_param_set = True
|
||||
break
|
||||
|
||||
if invalid_param_set or len(kwargs) > 0:
|
||||
if invalid_param_set:
|
||||
raise ValueError("if faiss_index_path is passed no other params besides faiss_config_path are allowed.")
|
||||
|
||||
def _validate_index_sync(self):
|
||||
@ -179,14 +186,21 @@ class FAISSDocumentStore(SQLDocumentStore):
|
||||
"was used when creating the original index."
|
||||
)
|
||||
|
||||
def _create_new_index(self, embedding_dim: int, metric_type, index_factory: str = "Flat", **kwargs):
|
||||
def _create_new_index(
|
||||
self,
|
||||
embedding_dim: int,
|
||||
metric_type,
|
||||
index_factory: str = "Flat",
|
||||
n_links: int = 64,
|
||||
ef_search: int = 20,
|
||||
ef_construction: int = 80,
|
||||
):
|
||||
if index_factory == "HNSW":
|
||||
# faiss index factory doesn't give the same results for HNSW IP, therefore direct init.
|
||||
# defaults here are similar to DPR codebase (good accuracy, but very high RAM consumption)
|
||||
n_links = kwargs.get("n_links", 64)
|
||||
index = faiss.IndexHNSWFlat(embedding_dim, n_links, metric_type)
|
||||
index.hnsw.efSearch = kwargs.get("efSearch", 20) # 20
|
||||
index.hnsw.efConstruction = kwargs.get("efConstruction", 80) # 80
|
||||
index.hnsw.efSearch = ef_search
|
||||
index.hnsw.efConstruction = ef_construction
|
||||
if "ivf" in index_factory.lower(): # enable reconstruction of vectors for inverted index
|
||||
self.faiss_indexes[index].set_direct_map_type(faiss.DirectMap.Hashtable)
|
||||
|
||||
|
||||
@ -60,7 +60,6 @@ class Milvus1DocumentStore(SQLDocumentStore):
|
||||
progress_bar: bool = True,
|
||||
duplicate_documents: str = "overwrite",
|
||||
isolation_level: str = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
:param sql_url: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale
|
||||
@ -106,7 +105,9 @@ class Milvus1DocumentStore(SQLDocumentStore):
|
||||
exists.
|
||||
:param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
|
||||
"""
|
||||
super().__init__()
|
||||
super().__init__(
|
||||
url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
|
||||
)
|
||||
|
||||
self.milvus_server = Milvus(uri=milvus_url, pool=connection_pool)
|
||||
|
||||
@ -141,10 +142,6 @@ class Milvus1DocumentStore(SQLDocumentStore):
|
||||
self.embedding_field = embedding_field
|
||||
self.progress_bar = progress_bar
|
||||
|
||||
super().__init__(
|
||||
url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
return self.milvus_server.close()
|
||||
|
||||
|
||||
@ -126,7 +126,9 @@ class Milvus2DocumentStore(SQLDocumentStore):
|
||||
exists.
|
||||
:param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
|
||||
"""
|
||||
super().__init__()
|
||||
super().__init__(
|
||||
url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
|
||||
)
|
||||
|
||||
connections.add_connection(default={"host": host, "port": port})
|
||||
connections.connect()
|
||||
@ -171,10 +173,6 @@ class Milvus2DocumentStore(SQLDocumentStore):
|
||||
self.return_embedding = return_embedding
|
||||
self.progress_bar = progress_bar
|
||||
|
||||
super().__init__(
|
||||
url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
|
||||
)
|
||||
|
||||
def _create_collection_and_index_if_not_exist(
|
||||
self, index: Optional[str] = None, consistency_level: int = 0, index_param: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
|
||||
@ -81,7 +81,6 @@ class PineconeDocumentStore(SQLDocumentStore):
|
||||
- `"overwrite"`: Update any existing documents with the same ID when adding documents.
|
||||
- `"fail"`: An error is raised if the document ID of the document being added already exists.
|
||||
"""
|
||||
|
||||
# Connect to Pinecone server using python client binding
|
||||
pinecone.init(api_key=api_key, environment=environment)
|
||||
self._api_key = api_key
|
||||
@ -129,8 +128,6 @@ class PineconeDocumentStore(SQLDocumentStore):
|
||||
|
||||
super().__init__(url=sql_url, index=clean_index, duplicate_documents=duplicate_documents)
|
||||
|
||||
# self._validate_index_sync()
|
||||
|
||||
def _sanitize_index_name(self, index: str) -> str:
|
||||
return index.replace("_", "-").lower()
|
||||
|
||||
|
||||
@ -70,7 +70,6 @@ class WeaviateDocumentStore(BaseDocumentStore):
|
||||
embedding_field: str = "embedding",
|
||||
progress_bar: bool = True,
|
||||
duplicate_documents: str = "overwrite",
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
:param host: Weaviate server connection URL for storing and processing documents and vectors.
|
||||
|
||||
@ -15,9 +15,6 @@
|
||||
},
|
||||
{
|
||||
"const": "1.3.0"
|
||||
},
|
||||
{
|
||||
"const": "1.3.1rc0"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
4191
haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json
Normal file
4191
haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -13,12 +13,6 @@
|
||||
{
|
||||
"const": "unstable"
|
||||
},
|
||||
{
|
||||
"const": "1.2.1rc0"
|
||||
},
|
||||
{
|
||||
"const": "1.3.0"
|
||||
},
|
||||
{
|
||||
"const": "1.3.1rc0"
|
||||
}
|
||||
@ -470,11 +464,13 @@
|
||||
},
|
||||
"similarity": {
|
||||
"title": "Similarity",
|
||||
"default": "dot_product"
|
||||
"default": "dot_product",
|
||||
"type": "string"
|
||||
},
|
||||
"timeout": {
|
||||
"title": "Timeout",
|
||||
"default": 30
|
||||
"default": 30,
|
||||
"type": "integer"
|
||||
},
|
||||
"return_embedding": {
|
||||
"title": "Return Embedding",
|
||||
@ -626,6 +622,21 @@
|
||||
"isolation_level": {
|
||||
"title": "Isolation Level",
|
||||
"type": "string"
|
||||
},
|
||||
"n_links": {
|
||||
"title": "N Links",
|
||||
"default": 64,
|
||||
"type": "integer"
|
||||
},
|
||||
"ef_search": {
|
||||
"title": "Ef Search",
|
||||
"default": 20,
|
||||
"type": "integer"
|
||||
},
|
||||
"ef_construction": {
|
||||
"title": "Ef Construction",
|
||||
"default": 80,
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
@ -918,9 +929,192 @@
|
||||
"title": "Parameters",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scheme": {
|
||||
"title": "Scheme",
|
||||
"default": "https",
|
||||
"type": "string"
|
||||
},
|
||||
"username": {
|
||||
"title": "Username",
|
||||
"default": "admin",
|
||||
"type": "string"
|
||||
},
|
||||
"password": {
|
||||
"title": "Password",
|
||||
"default": "admin",
|
||||
"type": "string"
|
||||
},
|
||||
"host": {
|
||||
"title": "Host",
|
||||
"default": "localhost",
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"port": {
|
||||
"title": "Port",
|
||||
"default": 9200,
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"api_key_id": {
|
||||
"title": "Api Key Id",
|
||||
"type": "string"
|
||||
},
|
||||
"api_key": {
|
||||
"title": "Api Key",
|
||||
"type": "string"
|
||||
},
|
||||
"aws4auth": {
|
||||
"title": "Aws4Auth"
|
||||
},
|
||||
"index": {
|
||||
"title": "Index",
|
||||
"default": "document",
|
||||
"type": "string"
|
||||
},
|
||||
"label_index": {
|
||||
"title": "Label Index",
|
||||
"default": "label",
|
||||
"type": "string"
|
||||
},
|
||||
"search_fields": {
|
||||
"title": "Search Fields",
|
||||
"default": "content",
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
"content_field": {
|
||||
"title": "Content Field",
|
||||
"default": "content",
|
||||
"type": "string"
|
||||
},
|
||||
"name_field": {
|
||||
"title": "Name Field",
|
||||
"default": "name",
|
||||
"type": "string"
|
||||
},
|
||||
"embedding_field": {
|
||||
"title": "Embedding Field",
|
||||
"default": "embedding",
|
||||
"type": "string"
|
||||
},
|
||||
"embedding_dim": {
|
||||
"title": "Embedding Dim",
|
||||
"default": 768,
|
||||
"type": "integer"
|
||||
},
|
||||
"custom_mapping": {
|
||||
"title": "Custom Mapping",
|
||||
"type": "object"
|
||||
},
|
||||
"excluded_meta_data": {
|
||||
"title": "Excluded Meta Data",
|
||||
"type": "array",
|
||||
"items": {}
|
||||
},
|
||||
"analyzer": {
|
||||
"title": "Analyzer",
|
||||
"default": "standard",
|
||||
"type": "string"
|
||||
},
|
||||
"ca_certs": {
|
||||
"title": "Ca Certs",
|
||||
"type": "string"
|
||||
},
|
||||
"verify_certs": {
|
||||
"title": "Verify Certs",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"recreate_index": {
|
||||
"title": "Recreate Index",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"create_index": {
|
||||
"title": "Create Index",
|
||||
"default": true,
|
||||
"type": "boolean"
|
||||
},
|
||||
"refresh_type": {
|
||||
"title": "Refresh Type",
|
||||
"default": "wait_for",
|
||||
"type": "string"
|
||||
},
|
||||
"similarity": {
|
||||
"title": "Similarity",
|
||||
"default": "cosine"
|
||||
"default": "cosine",
|
||||
"type": "string"
|
||||
},
|
||||
"timeout": {
|
||||
"title": "Timeout",
|
||||
"default": 30,
|
||||
"type": "integer"
|
||||
},
|
||||
"return_embedding": {
|
||||
"title": "Return Embedding",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"duplicate_documents": {
|
||||
"title": "Duplicate Documents",
|
||||
"default": "overwrite",
|
||||
"type": "string"
|
||||
},
|
||||
"index_type": {
|
||||
"title": "Index Type",
|
||||
"default": "flat",
|
||||
"type": "string"
|
||||
},
|
||||
"scroll": {
|
||||
"title": "Scroll",
|
||||
"default": "1d",
|
||||
"type": "string"
|
||||
},
|
||||
"skip_missing_embeddings": {
|
||||
"title": "Skip Missing Embeddings",
|
||||
"default": true,
|
||||
"type": "boolean"
|
||||
},
|
||||
"synonyms": {
|
||||
"title": "Synonyms",
|
||||
"type": "array",
|
||||
"items": {}
|
||||
},
|
||||
"synonym_type": {
|
||||
"title": "Synonym Type",
|
||||
"default": "synonym",
|
||||
"type": "string"
|
||||
},
|
||||
"use_system_proxy": {
|
||||
"title": "Use System Proxy",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
@ -951,25 +1145,192 @@
|
||||
"title": "Parameters",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"verify_certs": {
|
||||
"title": "Verify Certs",
|
||||
"default": false
|
||||
},
|
||||
"scheme": {
|
||||
"title": "Scheme",
|
||||
"default": "https"
|
||||
"default": "https",
|
||||
"type": "string"
|
||||
},
|
||||
"username": {
|
||||
"title": "Username",
|
||||
"default": "admin"
|
||||
"default": "admin",
|
||||
"type": "string"
|
||||
},
|
||||
"password": {
|
||||
"title": "Password",
|
||||
"default": "admin"
|
||||
"default": "admin",
|
||||
"type": "string"
|
||||
},
|
||||
"host": {
|
||||
"title": "Host",
|
||||
"default": "localhost",
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"port": {
|
||||
"title": "Port",
|
||||
"default": 9200
|
||||
"default": 9200,
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"api_key_id": {
|
||||
"title": "Api Key Id",
|
||||
"type": "string"
|
||||
},
|
||||
"api_key": {
|
||||
"title": "Api Key",
|
||||
"type": "string"
|
||||
},
|
||||
"aws4auth": {
|
||||
"title": "Aws4Auth"
|
||||
},
|
||||
"index": {
|
||||
"title": "Index",
|
||||
"default": "document",
|
||||
"type": "string"
|
||||
},
|
||||
"label_index": {
|
||||
"title": "Label Index",
|
||||
"default": "label",
|
||||
"type": "string"
|
||||
},
|
||||
"search_fields": {
|
||||
"title": "Search Fields",
|
||||
"default": "content",
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
"content_field": {
|
||||
"title": "Content Field",
|
||||
"default": "content",
|
||||
"type": "string"
|
||||
},
|
||||
"name_field": {
|
||||
"title": "Name Field",
|
||||
"default": "name",
|
||||
"type": "string"
|
||||
},
|
||||
"embedding_field": {
|
||||
"title": "Embedding Field",
|
||||
"default": "embedding",
|
||||
"type": "string"
|
||||
},
|
||||
"embedding_dim": {
|
||||
"title": "Embedding Dim",
|
||||
"default": 768,
|
||||
"type": "integer"
|
||||
},
|
||||
"custom_mapping": {
|
||||
"title": "Custom Mapping",
|
||||
"type": "object"
|
||||
},
|
||||
"excluded_meta_data": {
|
||||
"title": "Excluded Meta Data",
|
||||
"type": "array",
|
||||
"items": {}
|
||||
},
|
||||
"analyzer": {
|
||||
"title": "Analyzer",
|
||||
"default": "standard",
|
||||
"type": "string"
|
||||
},
|
||||
"ca_certs": {
|
||||
"title": "Ca Certs",
|
||||
"type": "string"
|
||||
},
|
||||
"verify_certs": {
|
||||
"title": "Verify Certs",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"recreate_index": {
|
||||
"title": "Recreate Index",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"create_index": {
|
||||
"title": "Create Index",
|
||||
"default": true,
|
||||
"type": "boolean"
|
||||
},
|
||||
"refresh_type": {
|
||||
"title": "Refresh Type",
|
||||
"default": "wait_for",
|
||||
"type": "string"
|
||||
},
|
||||
"similarity": {
|
||||
"title": "Similarity",
|
||||
"default": "dot_product",
|
||||
"type": "string"
|
||||
},
|
||||
"timeout": {
|
||||
"title": "Timeout",
|
||||
"default": 30,
|
||||
"type": "integer"
|
||||
},
|
||||
"return_embedding": {
|
||||
"title": "Return Embedding",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
},
|
||||
"duplicate_documents": {
|
||||
"title": "Duplicate Documents",
|
||||
"default": "overwrite",
|
||||
"type": "string"
|
||||
},
|
||||
"index_type": {
|
||||
"title": "Index Type",
|
||||
"default": "flat",
|
||||
"type": "string"
|
||||
},
|
||||
"scroll": {
|
||||
"title": "Scroll",
|
||||
"default": "1d",
|
||||
"type": "string"
|
||||
},
|
||||
"skip_missing_embeddings": {
|
||||
"title": "Skip Missing Embeddings",
|
||||
"default": true,
|
||||
"type": "boolean"
|
||||
},
|
||||
"synonyms": {
|
||||
"title": "Synonyms",
|
||||
"type": "array",
|
||||
"items": {}
|
||||
},
|
||||
"synonym_type": {
|
||||
"title": "Synonym Type",
|
||||
"default": "synonym",
|
||||
"type": "string"
|
||||
},
|
||||
"use_system_proxy": {
|
||||
"title": "Use System Proxy",
|
||||
"default": false,
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
||||
@ -58,9 +58,6 @@
|
||||
},
|
||||
{
|
||||
"const": "1.3.0"
|
||||
},
|
||||
{
|
||||
"const": "1.3.1rc0"
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -70,6 +67,24 @@
|
||||
"$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.2.1rc0.schema.json"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"allOf": [
|
||||
{
|
||||
"properties": {
|
||||
"version": {
|
||||
"oneOf": [
|
||||
{
|
||||
"const": "1.3.1rc0"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.3.1rc0.schema.json"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -154,6 +154,13 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[
|
||||
raise PipelineSchemaError(f"Could not read the __init__ method of {node_name} to create its schema.")
|
||||
|
||||
signature = get_typed_signature(init_method)
|
||||
|
||||
# Check for variadic parameters (*args or **kwargs) and raise an exception if found
|
||||
if any(param.kind in {param.VAR_POSITIONAL, param.VAR_KEYWORD} for param in signature.parameters.values()):
|
||||
raise PipelineSchemaError(
|
||||
"Nodes cannot use variadic parameters like *args or **kwargs in their __init__ function."
|
||||
)
|
||||
|
||||
param_fields = [
|
||||
param for param in signature.parameters.values() if param.kind not in {param.VAR_POSITIONAL, param.VAR_KEYWORD}
|
||||
]
|
||||
|
||||
@ -62,7 +62,6 @@ class FARMReader(BaseReader):
|
||||
local_files_only=False,
|
||||
force_download=False,
|
||||
use_auth_token: Optional[Union[str, bool]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
"""
|
||||
@ -140,7 +139,6 @@ class FARMReader(BaseReader):
|
||||
force_download=force_download,
|
||||
devices=self.devices,
|
||||
use_auth_token=use_auth_token,
|
||||
**kwargs,
|
||||
)
|
||||
self.inferencer.model.prediction_heads[0].context_window_size = context_window_size
|
||||
self.inferencer.model.prediction_heads[0].no_ans_boost = no_ans_boost
|
||||
|
||||
@ -842,9 +842,7 @@ def get_document_store(
|
||||
)
|
||||
|
||||
elif document_store_type == "weaviate":
|
||||
document_store = WeaviateDocumentStore(
|
||||
weaviate_url="http://localhost:8080", index=index, similarity=similarity, embedding_dim=embedding_dim
|
||||
)
|
||||
document_store = WeaviateDocumentStore(index=index, similarity=similarity, embedding_dim=embedding_dim)
|
||||
document_store.weaviate_client.schema.delete_all()
|
||||
document_store._create_schema_and_index_if_not_exist()
|
||||
|
||||
|
||||
@ -667,6 +667,72 @@ def test_load_yaml_custom_component_with_superclass(tmp_path):
|
||||
Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
||||
|
||||
|
||||
def test_load_yaml_custom_component_with_variadic_args(tmp_path):
|
||||
class BaseCustomNode(MockNode):
|
||||
def __init__(self, base_parameter: int):
|
||||
super().__init__()
|
||||
self.base_parameter = base_parameter
|
||||
|
||||
class CustomNode(BaseCustomNode):
|
||||
def __init__(self, some_parameter: str, *args):
|
||||
super().__init__(*args)
|
||||
self.some_parameter = some_parameter
|
||||
|
||||
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
||||
tmp_file.write(
|
||||
f"""
|
||||
version: unstable
|
||||
components:
|
||||
- name: custom_node
|
||||
type: CustomNode
|
||||
params:
|
||||
base_parameter: 1
|
||||
some_parameter: value
|
||||
pipelines:
|
||||
- name: my_pipeline
|
||||
nodes:
|
||||
- name: custom_node
|
||||
inputs:
|
||||
- Query
|
||||
"""
|
||||
)
|
||||
with pytest.raises(PipelineSchemaError, match="variadic"):
|
||||
Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
||||
|
||||
|
||||
def test_load_yaml_custom_component_with_variadic_kwargs(tmp_path):
|
||||
class BaseCustomNode(MockNode):
|
||||
def __init__(self, base_parameter: int):
|
||||
super().__init__()
|
||||
self.base_parameter = base_parameter
|
||||
|
||||
class CustomNode(BaseCustomNode):
|
||||
def __init__(self, some_parameter: str, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.some_parameter = some_parameter
|
||||
|
||||
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
||||
tmp_file.write(
|
||||
f"""
|
||||
version: unstable
|
||||
components:
|
||||
- name: custom_node
|
||||
type: CustomNode
|
||||
params:
|
||||
base_parameter: 1
|
||||
some_parameter: value
|
||||
pipelines:
|
||||
- name: my_pipeline
|
||||
nodes:
|
||||
- name: custom_node
|
||||
inputs:
|
||||
- Query
|
||||
"""
|
||||
)
|
||||
with pytest.raises(PipelineSchemaError, match="variadic"):
|
||||
Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
|
||||
|
||||
|
||||
def test_load_yaml_no_pipelines(tmp_path):
|
||||
with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
|
||||
tmp_file.write(
|
||||
|
||||
@ -192,9 +192,7 @@ def test_retribert_embedding(document_store, retriever, docs):
|
||||
if isinstance(document_store, WeaviateDocumentStore):
|
||||
# Weaviate sets the embedding dimension to 768 as soon as it is initialized.
|
||||
# We need 128 here and therefore initialize a new WeaviateDocumentStore.
|
||||
document_store = WeaviateDocumentStore(
|
||||
weaviate_url="http://localhost:8080", index="haystack_test", embedding_dim=128
|
||||
)
|
||||
document_store = WeaviateDocumentStore(index="haystack_test", embedding_dim=128)
|
||||
document_store.weaviate_client.schema.delete_all()
|
||||
document_store._create_schema_and_index_if_not_exist()
|
||||
document_store.return_embedding = True
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user