Forbid usage of *args and **kwargs in any node's __init__ (#2362)

* Add failing test * Remove `**kwargs` from docstores' `__init__` functions (#2407) * Remove kwargs from ESDocStore subclasses * Remove kwargs from subclasses of SQLDocumentStore * Remove kwargs from Weaviate * Revert change in pinecone * Fix tests * Fix retriever test wirh weaviate * Change Exception into DocumentStoreError * Update Documentation & Code Style * Remove `**kwargs` from `FARMReader` (#2413) * Remove FARMReader kwargs without trying to replace them functionally * Update Documentation & Code Style * enforce same index values before and after saving/loading eval dataframes (#2398) * Add tests for missing `__init__` and `super().__init__()` in custom nodes (#2350) * Add tests for missing init and super * Update Documentation & Code Style * change in with endswith * Move test in pipeline.py and change test in pipeline_yaml.py * Update Documentation & Code Style * Use caplog to test the warning * Update Documentation & Code Style * move tests into test_pipeline and use get_config * Update Documentation & Code Style * Unmock version name * Improve variadic args test * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2025-12-27 15:08:43 +00:00 · 2022-04-14 16:42:02 +02:00 · 2022-04-14 16:42:02 +02:00 · 929c685cda
commit 929c685cda
parent 46a50fb979
17 changed files with 4849 additions and 75 deletions
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@ -414,7 +414,7 @@ class ElasticsearchDocumentStore(KeywordDocumentStore)
 #### \_\_init\_\_

 ```python
-def __init__(host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, username: str = "", password: str = "", api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", scheme: str = "http", ca_certs: Optional[str] = None, verify_certs: bool = True, recreate_index: bool = False, create_index: bool = True, refresh_type: str = "wait_for", similarity="dot_product", timeout=30, return_embedding: bool = False, duplicate_documents: str = "overwrite", index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym", use_system_proxy: bool = False)
+def __init__(host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, username: str = "", password: str = "", api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", scheme: str = "http", ca_certs: Optional[str] = None, verify_certs: bool = True, recreate_index: bool = False, create_index: bool = True, refresh_type: str = "wait_for", similarity: str = "dot_product", timeout: int = 30, return_embedding: bool = False, duplicate_documents: str = "overwrite", index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym", use_system_proxy: bool = False)
 ```

 A DocumentStore using Elasticsearch to store and query the documents for our search.
@ -1231,7 +1231,7 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore)
 #### \_\_init\_\_

 ```python
-def __init__(verify_certs=False, scheme="https", username="admin", password="admin", port=9200, **kwargs)
+def __init__(scheme: str = "https", username: str = "admin", password: str = "admin", host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", ca_certs: Optional[str] = None, verify_certs: bool = False, recreate_index: bool = False, create_index: bool = True, refresh_type: str = "wait_for", similarity: str = "dot_product", timeout: int = 30, return_embedding: bool = False, duplicate_documents: str = "overwrite", index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym", use_system_proxy: bool = False)
 ```

 Document Store using OpenSearch (https://opensearch.org/). It is compatible with the AWS Elasticsearch Service.
@ -2235,7 +2235,7 @@ the vector embeddings are indexed in a FAISS Index.
 #### \_\_init\_\_

 ```python
-def __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = None, embedding_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional[faiss.swigfaiss.Index] = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, isolation_level: str = None, **kwargs, ,)
+def __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = None, embedding_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional[faiss.swigfaiss.Index] = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, isolation_level: str = None, n_links: int = 64, ef_search: int = 20, ef_construction: int = 80)
 ```

 **Arguments**:
@ -2282,6 +2282,9 @@ If specified no other params besides faiss_config_path must be specified.
 - `faiss_config_path`: Stored FAISS initial configuration parameters.
 Can be created via calling `save()`
 - `isolation_level`: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
+- `n_links`: used only if index_factory == "HNSW"
+- `ef_search`: used only if index_factory == "HNSW"
+- `ef_construction`: used only if index_factory == "HNSW"

 <a id="faiss.FAISSDocumentStore.write_documents"></a>

@ -2545,7 +2548,7 @@ Usage:
 #### \_\_init\_\_

 ```python
-def __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = None, embedding_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", isolation_level: str = None, **kwargs, ,)
+def __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = None, embedding_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", isolation_level: str = None)
 ```

 **Arguments**:
@ -3168,7 +3171,7 @@ The current implementation is not supporting the storage of labels, so you canno
 #### \_\_init\_\_

 ```python
-def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int, List[int]] = 8080, timeout_config: tuple = (5, 15), username: str = None, password: str = None, index: str = "Document", embedding_dim: int = 768, content_field: str = "content", name_field: str = "name", similarity: str = "cosine", index_type: str = "hnsw", custom_schema: Optional[dict] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite", **kwargs, ,)
+def __init__(host: Union[str, List[str]] = "http://localhost", port: Union[int, List[int]] = 8080, timeout_config: tuple = (5, 15), username: str = None, password: str = None, index: str = "Document", embedding_dim: int = 768, content_field: str = "content", name_field: str = "name", similarity: str = "cosine", index_type: str = "hnsw", custom_schema: Optional[dict] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = "overwrite")
 ```

 **Arguments**:
--- a/docs/_src/api/api/reader.md
+++ b/docs/_src/api/api/reader.md
@ -55,7 +55,7 @@ While the underlying model can vary (BERT, Roberta, DistilBERT, ...), the interf
 #### \_\_init\_\_

 ```python
-def __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, devices: List[torch.device] = [], no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, confidence_threshold: Optional[float] = None, proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str, bool]] = None, **kwargs, ,)
+def __init__(model_name_or_path: str, model_version: Optional[str] = None, context_window_size: int = 150, batch_size: int = 50, use_gpu: bool = True, devices: List[torch.device] = [], no_ans_boost: float = 0.0, return_no_answer: bool = False, top_k: int = 10, top_k_per_candidate: int = 3, top_k_per_sample: int = 1, num_processes: Optional[int] = None, max_seq_len: int = 256, doc_stride: int = 128, progress_bar: bool = True, duplicate_filtering: int = 0, use_confidence_scores: bool = True, confidence_threshold: Optional[float] = None, proxies: Optional[Dict[str, str]] = None, local_files_only=False, force_download=False, use_auth_token: Optional[Union[str, bool]] = None)
 ```

 **Arguments**:
--- a/haystack/document_stores/elasticsearch.py
+++ b/haystack/document_stores/elasticsearch.py
@ -23,6 +23,7 @@ from haystack.document_stores import KeywordDocumentStore
 from haystack.schema import Document, Label
 from haystack.document_stores.base import get_batches_from_generator
 from haystack.document_stores.filter_utils import LogicalFilterClause
+from haystack.errors import DocumentStoreError


 logger = logging.getLogger(__name__)
@ -54,8 +55,8 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
        recreate_index: bool = False,
        create_index: bool = True,
        refresh_type: str = "wait_for",
-        similarity="dot_product",
-        timeout=30,
+        similarity: str = "dot_product",
+        timeout: int = 30,
        return_embedding: bool = False,
        duplicate_documents: str = "overwrite",
        index_type: str = "flat",
@ -179,9 +180,9 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
        self.scroll = scroll
        self.skip_missing_embeddings: bool = skip_missing_embeddings
        if similarity in ["cosine", "dot_product", "l2"]:
-            self.similarity = similarity
+            self.similarity: str = similarity
        else:
-            raise Exception(
+            raise DocumentStoreError(
                f"Invalid value {similarity} for similarity in ElasticSearchDocumentStore constructor. Choose between 'cosine', 'l2' and 'dot_product'"
            )
        if index_type in ["flat", "hnsw"]:
@ -1592,7 +1593,42 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):


 class OpenSearchDocumentStore(ElasticsearchDocumentStore):
-    def __init__(self, verify_certs=False, scheme="https", username="admin", password="admin", port=9200, **kwargs):
+    def __init__(
+        self,
+        scheme: str = "https",  # Mind this different default param
+        username: str = "admin",  # Mind this different default param
+        password: str = "admin",  # Mind this different default param
+        host: Union[str, List[str]] = "localhost",
+        port: Union[int, List[int]] = 9200,
+        api_key_id: Optional[str] = None,
+        api_key: Optional[str] = None,
+        aws4auth=None,
+        index: str = "document",
+        label_index: str = "label",
+        search_fields: Union[str, list] = "content",
+        content_field: str = "content",
+        name_field: str = "name",
+        embedding_field: str = "embedding",
+        embedding_dim: int = 768,
+        custom_mapping: Optional[dict] = None,
+        excluded_meta_data: Optional[list] = None,
+        analyzer: str = "standard",
+        ca_certs: Optional[str] = None,
+        verify_certs: bool = False,  # Mind this different default param
+        recreate_index: bool = False,
+        create_index: bool = True,
+        refresh_type: str = "wait_for",
+        similarity: str = "dot_product",
+        timeout: int = 30,
+        return_embedding: bool = False,
+        duplicate_documents: str = "overwrite",
+        index_type: str = "flat",
+        scroll: str = "1d",
+        skip_missing_embeddings: bool = True,
+        synonyms: Optional[List] = None,
+        synonym_type: str = "synonym",
+        use_system_proxy: bool = False,
+    ):
        """
        Document Store using OpenSearch (https://opensearch.org/). It is compatible with the AWS Elasticsearch Service.

@ -1662,14 +1698,44 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore):
                             Synonym or Synonym_graph to handle synonyms, including multi-word synonyms correctly during the analysis process.
                             More info at https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-synonym-graph-tokenfilter.html
        """
+        super().__init__(
+            scheme=scheme,
+            username=username,
+            password=password,
+            host=host,
+            port=port,
+            api_key_id=api_key_id,
+            api_key=api_key,
+            aws4auth=aws4auth,
+            index=index,
+            label_index=label_index,
+            search_fields=search_fields,
+            content_field=content_field,
+            name_field=name_field,
+            embedding_field=embedding_field,
+            embedding_dim=embedding_dim,
+            custom_mapping=custom_mapping,
+            excluded_meta_data=excluded_meta_data,
+            analyzer=analyzer,
+            ca_certs=ca_certs,
+            verify_certs=verify_certs,
+            recreate_index=recreate_index,
+            create_index=create_index,
+            refresh_type=refresh_type,
+            similarity=similarity,
+            timeout=timeout,
+            return_embedding=return_embedding,
+            duplicate_documents=duplicate_documents,
+            index_type=index_type,
+            scroll=scroll,
+            skip_missing_embeddings=skip_missing_embeddings,
+            synonyms=synonyms,
+            synonym_type=synonym_type,
+            use_system_proxy=use_system_proxy,
+        )
        self.embeddings_field_supports_similarity = False
        self.similarity_to_space_type = {"cosine": "cosinesimil", "dot_product": "innerproduct", "l2": "l2"}
        self.space_type_to_similarity = {v: k for k, v in self.similarity_to_space_type.items()}
-        # Overwrite default kwarg values of parent class so that in default cases we can initialize
-        # an OpenSearchDocumentStore without provding any arguments
-        super(OpenSearchDocumentStore, self).__init__(
-            verify_certs=verify_certs, scheme=scheme, username=username, password=password, port=port, **kwargs
-        )

    def query_by_embedding(
        self,
@ -1914,7 +1980,7 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore):
            if not self.client.indices.exists(index=index_name, headers=headers):
                raise e

-    def _get_embedding_field_mapping(self, similarity: Optional[str]):
+    def _get_embedding_field_mapping(self, similarity: str):
        space_type = self.similarity_to_space_type[similarity]
        method: dict = {"space_type": space_type, "name": "hnsw", "engine": "nmslib"}

@ -2049,10 +2115,79 @@ class OpenDistroElasticsearchDocumentStore(OpenSearchDocumentStore):
    A DocumentStore which has an Open Distro for Elasticsearch service behind it.
    """

-    def __init__(self, similarity="cosine", **kwargs):
+    def __init__(
+        self,
+        scheme: str = "https",
+        username: str = "admin",
+        password: str = "admin",
+        host: Union[str, List[str]] = "localhost",
+        port: Union[int, List[int]] = 9200,
+        api_key_id: Optional[str] = None,
+        api_key: Optional[str] = None,
+        aws4auth=None,
+        index: str = "document",
+        label_index: str = "label",
+        search_fields: Union[str, list] = "content",
+        content_field: str = "content",
+        name_field: str = "name",
+        embedding_field: str = "embedding",
+        embedding_dim: int = 768,
+        custom_mapping: Optional[dict] = None,
+        excluded_meta_data: Optional[list] = None,
+        analyzer: str = "standard",
+        ca_certs: Optional[str] = None,
+        verify_certs: bool = False,
+        recreate_index: bool = False,
+        create_index: bool = True,
+        refresh_type: str = "wait_for",
+        similarity: str = "cosine",  # Mind this different default param
+        timeout: int = 30,
+        return_embedding: bool = False,
+        duplicate_documents: str = "overwrite",
+        index_type: str = "flat",
+        scroll: str = "1d",
+        skip_missing_embeddings: bool = True,
+        synonyms: Optional[List] = None,
+        synonym_type: str = "synonym",
+        use_system_proxy: bool = False,
+    ):
        logger.warning(
            "Open Distro for Elasticsearch has been replaced by OpenSearch! "
            "See https://opensearch.org/faq/ for details. "
            "We recommend using the OpenSearchDocumentStore instead."
        )
-        super(OpenDistroElasticsearchDocumentStore, self).__init__(similarity=similarity, **kwargs)
+        super().__init__(
+            scheme=scheme,
+            username=username,
+            password=password,
+            host=host,
+            port=port,
+            api_key_id=api_key_id,
+            api_key=api_key,
+            aws4auth=aws4auth,
+            index=index,
+            label_index=label_index,
+            search_fields=search_fields,
+            content_field=content_field,
+            name_field=name_field,
+            embedding_field=embedding_field,
+            embedding_dim=embedding_dim,
+            custom_mapping=custom_mapping,
+            excluded_meta_data=excluded_meta_data,
+            analyzer=analyzer,
+            ca_certs=ca_certs,
+            verify_certs=verify_certs,
+            recreate_index=recreate_index,
+            create_index=create_index,
+            refresh_type=refresh_type,
+            similarity=similarity,
+            timeout=timeout,
+            return_embedding=return_embedding,
+            duplicate_documents=duplicate_documents,
+            index_type=index_type,
+            scroll=scroll,
+            skip_missing_embeddings=skip_missing_embeddings,
+            synonyms=synonyms,
+            synonym_type=synonym_type,
+            use_system_proxy=use_system_proxy,
+        )
--- a/haystack/document_stores/faiss.py
+++ b/haystack/document_stores/faiss.py
@ -57,7 +57,9 @@ class FAISSDocumentStore(SQLDocumentStore):
        faiss_index_path: Union[str, Path] = None,
        faiss_config_path: Union[str, Path] = None,
        isolation_level: str = None,
-        **kwargs,
+        n_links: int = 64,
+        ef_search: int = 20,
+        ef_construction: int = 80,
    ):
        """
        :param sql_url: SQL connection URL for database. It defaults to local file based SQLite DB. For large scale
@ -102,12 +104,15 @@ class FAISSDocumentStore(SQLDocumentStore):
        :param faiss_config_path: Stored FAISS initial configuration parameters.
            Can be created via calling `save()`
        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
+        :param n_links: used only if index_factory == "HNSW"
+        :param ef_search: used only if index_factory == "HNSW"
+        :param ef_construction: used only if index_factory == "HNSW"
        """
        # special case if we want to load an existing index from disk
        # load init params from disk and run init again
        if faiss_index_path is not None:
            sig = signature(self.__class__.__init__)
-            self._validate_params_load_from_disk(sig, locals(), kwargs)
+            self._validate_params_load_from_disk(sig, locals())
            init_params = self._load_init_params_from_config(faiss_index_path, faiss_config_path)
            self.__class__.__init__(self, **init_params)  # pylint: disable=non-parent-init-called
            return
@ -141,7 +146,9 @@ class FAISSDocumentStore(SQLDocumentStore):
                embedding_dim=self.embedding_dim,
                index_factory=faiss_index_factory_str,
                metric_type=self.metric_type,
-                **kwargs,
+                n_links=n_links,
+                ef_search=ef_search,
+                ef_construction=ef_construction,
            )

        self.return_embedding = return_embedding
@ -155,8 +162,8 @@ class FAISSDocumentStore(SQLDocumentStore):

        self._validate_index_sync()

-    def _validate_params_load_from_disk(self, sig: Signature, locals: dict, kwargs: dict):
-        allowed_params = ["faiss_index_path", "faiss_config_path", "self", "kwargs"]
+    def _validate_params_load_from_disk(self, sig: Signature, locals: dict):
+        allowed_params = ["faiss_index_path", "faiss_config_path", "self"]
        invalid_param_set = False

        for param in sig.parameters.values():
@ -164,7 +171,7 @@ class FAISSDocumentStore(SQLDocumentStore):
                invalid_param_set = True
                break

-        if invalid_param_set or len(kwargs) > 0:
+        if invalid_param_set:
            raise ValueError("if faiss_index_path is passed no other params besides faiss_config_path are allowed.")

    def _validate_index_sync(self):
@ -179,14 +186,21 @@ class FAISSDocumentStore(SQLDocumentStore):
                "was used when creating the original index."
            )

-    def _create_new_index(self, embedding_dim: int, metric_type, index_factory: str = "Flat", **kwargs):
+    def _create_new_index(
+        self,
+        embedding_dim: int,
+        metric_type,
+        index_factory: str = "Flat",
+        n_links: int = 64,
+        ef_search: int = 20,
+        ef_construction: int = 80,
+    ):
        if index_factory == "HNSW":
            # faiss index factory doesn't give the same results for HNSW IP, therefore direct init.
            # defaults here are similar to DPR codebase (good accuracy, but very high RAM consumption)
-            n_links = kwargs.get("n_links", 64)
            index = faiss.IndexHNSWFlat(embedding_dim, n_links, metric_type)
-            index.hnsw.efSearch = kwargs.get("efSearch", 20)  # 20
-            index.hnsw.efConstruction = kwargs.get("efConstruction", 80)  # 80
+            index.hnsw.efSearch = ef_search
+            index.hnsw.efConstruction = ef_construction
            if "ivf" in index_factory.lower():  # enable reconstruction of vectors for inverted index
                self.faiss_indexes[index].set_direct_map_type(faiss.DirectMap.Hashtable)

--- a/haystack/document_stores/milvus1.py
+++ b/haystack/document_stores/milvus1.py
@ -60,7 +60,6 @@ class Milvus1DocumentStore(SQLDocumentStore):
        progress_bar: bool = True,
        duplicate_documents: str = "overwrite",
        isolation_level: str = None,
-        **kwargs,
    ):
        """
        :param sql_url: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale
@ -106,7 +105,9 @@ class Milvus1DocumentStore(SQLDocumentStore):
                                    exists.
        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
        """
-        super().__init__()
+        super().__init__(
+            url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
+        )

        self.milvus_server = Milvus(uri=milvus_url, pool=connection_pool)

@ -141,10 +142,6 @@ class Milvus1DocumentStore(SQLDocumentStore):
        self.embedding_field = embedding_field
        self.progress_bar = progress_bar

-        super().__init__(
-            url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
-        )
-
    def __del__(self):
        return self.milvus_server.close()

--- a/haystack/document_stores/milvus2.py
+++ b/haystack/document_stores/milvus2.py
@ -126,7 +126,9 @@ class Milvus2DocumentStore(SQLDocumentStore):
                                    exists.
        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
        """
-        super().__init__()
+        super().__init__(
+            url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
+        )

        connections.add_connection(default={"host": host, "port": port})
        connections.connect()
@ -171,10 +173,6 @@ class Milvus2DocumentStore(SQLDocumentStore):
        self.return_embedding = return_embedding
        self.progress_bar = progress_bar

-        super().__init__(
-            url=sql_url, index=index, duplicate_documents=duplicate_documents, isolation_level=isolation_level
-        )
-
    def _create_collection_and_index_if_not_exist(
        self, index: Optional[str] = None, consistency_level: int = 0, index_param: Optional[Dict[str, Any]] = None
    ):
--- a/haystack/document_stores/pinecone.py
+++ b/haystack/document_stores/pinecone.py
@ -81,7 +81,6 @@ class PineconeDocumentStore(SQLDocumentStore):
                - `"overwrite"`: Update any existing documents with the same ID when adding documents.
                - `"fail"`: An error is raised if the document ID of the document being added already exists.
        """
-
        # Connect to Pinecone server using python client binding
        pinecone.init(api_key=api_key, environment=environment)
        self._api_key = api_key
@ -129,8 +128,6 @@ class PineconeDocumentStore(SQLDocumentStore):

        super().__init__(url=sql_url, index=clean_index, duplicate_documents=duplicate_documents)

-        # self._validate_index_sync()
-
    def _sanitize_index_name(self, index: str) -> str:
        return index.replace("_", "-").lower()

--- a/haystack/document_stores/weaviate.py
+++ b/haystack/document_stores/weaviate.py
@ -70,7 +70,6 @@ class WeaviateDocumentStore(BaseDocumentStore):
        embedding_field: str = "embedding",
        progress_bar: bool = True,
        duplicate_documents: str = "overwrite",
-        **kwargs,
    ):
        """
        :param host: Weaviate server connection URL for storing and processing documents and vectors.
--- a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
@ -15,9 +15,6 @@
        },
        {
          "const": "1.3.0"
-        },
-        {
-          "const": "1.3.1rc0"
        }
      ]
    },
--- a/haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-1.3.1rc0.schema.json
--- a/haystack/json-schemas/haystack-pipeline-unstable.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-unstable.schema.json
@ -13,12 +13,6 @@
        {
          "const": "unstable"
        },
-        {
-          "const": "1.2.1rc0"
-        },
-        {
-          "const": "1.3.0"
-        },
        {
          "const": "1.3.1rc0"
        }
@ -470,11 +464,13 @@
            },
            "similarity": {
              "title": "Similarity",
-              "default": "dot_product"
+              "default": "dot_product",
+              "type": "string"
            },
            "timeout": {
              "title": "Timeout",
-              "default": 30
+              "default": 30,
+              "type": "integer"
            },
            "return_embedding": {
              "title": "Return Embedding",
@ -626,6 +622,21 @@
            "isolation_level": {
              "title": "Isolation Level",
              "type": "string"
+            },
+            "n_links": {
+              "title": "N Links",
+              "default": 64,
+              "type": "integer"
+            },
+            "ef_search": {
+              "title": "Ef Search",
+              "default": 20,
+              "type": "integer"
+            },
+            "ef_construction": {
+              "title": "Ef Construction",
+              "default": 80,
+              "type": "integer"
            }
          },
          "additionalProperties": false,
@ -918,9 +929,192 @@
          "title": "Parameters",
          "type": "object",
          "properties": {
+            "scheme": {
+              "title": "Scheme",
+              "default": "https",
+              "type": "string"
+            },
+            "username": {
+              "title": "Username",
+              "default": "admin",
+              "type": "string"
+            },
+            "password": {
+              "title": "Password",
+              "default": "admin",
+              "type": "string"
+            },
+            "host": {
+              "title": "Host",
+              "default": "localhost",
+              "anyOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "array",
+                  "items": {
+                    "type": "string"
+                  }
+                }
+              ]
+            },
+            "port": {
+              "title": "Port",
+              "default": 9200,
+              "anyOf": [
+                {
+                  "type": "integer"
+                },
+                {
+                  "type": "array",
+                  "items": {
+                    "type": "integer"
+                  }
+                }
+              ]
+            },
+            "api_key_id": {
+              "title": "Api Key Id",
+              "type": "string"
+            },
+            "api_key": {
+              "title": "Api Key",
+              "type": "string"
+            },
+            "aws4auth": {
+              "title": "Aws4Auth"
+            },
+            "index": {
+              "title": "Index",
+              "default": "document",
+              "type": "string"
+            },
+            "label_index": {
+              "title": "Label Index",
+              "default": "label",
+              "type": "string"
+            },
+            "search_fields": {
+              "title": "Search Fields",
+              "default": "content",
+              "anyOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "array",
+                  "items": {}
+                }
+              ]
+            },
+            "content_field": {
+              "title": "Content Field",
+              "default": "content",
+              "type": "string"
+            },
+            "name_field": {
+              "title": "Name Field",
+              "default": "name",
+              "type": "string"
+            },
+            "embedding_field": {
+              "title": "Embedding Field",
+              "default": "embedding",
+              "type": "string"
+            },
+            "embedding_dim": {
+              "title": "Embedding Dim",
+              "default": 768,
+              "type": "integer"
+            },
+            "custom_mapping": {
+              "title": "Custom Mapping",
+              "type": "object"
+            },
+            "excluded_meta_data": {
+              "title": "Excluded Meta Data",
+              "type": "array",
+              "items": {}
+            },
+            "analyzer": {
+              "title": "Analyzer",
+              "default": "standard",
+              "type": "string"
+            },
+            "ca_certs": {
+              "title": "Ca Certs",
+              "type": "string"
+            },
+            "verify_certs": {
+              "title": "Verify Certs",
+              "default": false,
+              "type": "boolean"
+            },
+            "recreate_index": {
+              "title": "Recreate Index",
+              "default": false,
+              "type": "boolean"
+            },
+            "create_index": {
+              "title": "Create Index",
+              "default": true,
+              "type": "boolean"
+            },
+            "refresh_type": {
+              "title": "Refresh Type",
+              "default": "wait_for",
+              "type": "string"
+            },
            "similarity": {
              "title": "Similarity",
-              "default": "cosine"
+              "default": "cosine",
+              "type": "string"
+            },
+            "timeout": {
+              "title": "Timeout",
+              "default": 30,
+              "type": "integer"
+            },
+            "return_embedding": {
+              "title": "Return Embedding",
+              "default": false,
+              "type": "boolean"
+            },
+            "duplicate_documents": {
+              "title": "Duplicate Documents",
+              "default": "overwrite",
+              "type": "string"
+            },
+            "index_type": {
+              "title": "Index Type",
+              "default": "flat",
+              "type": "string"
+            },
+            "scroll": {
+              "title": "Scroll",
+              "default": "1d",
+              "type": "string"
+            },
+            "skip_missing_embeddings": {
+              "title": "Skip Missing Embeddings",
+              "default": true,
+              "type": "boolean"
+            },
+            "synonyms": {
+              "title": "Synonyms",
+              "type": "array",
+              "items": {}
+            },
+            "synonym_type": {
+              "title": "Synonym Type",
+              "default": "synonym",
+              "type": "string"
+            },
+            "use_system_proxy": {
+              "title": "Use System Proxy",
+              "default": false,
+              "type": "boolean"
            }
          },
          "additionalProperties": false,
@ -951,25 +1145,192 @@
          "title": "Parameters",
          "type": "object",
          "properties": {
-            "verify_certs": {
-              "title": "Verify Certs",
-              "default": false
-            },
            "scheme": {
              "title": "Scheme",
-              "default": "https"
+              "default": "https",
+              "type": "string"
            },
            "username": {
              "title": "Username",
-              "default": "admin"
+              "default": "admin",
+              "type": "string"
            },
            "password": {
              "title": "Password",
-              "default": "admin"
+              "default": "admin",
+              "type": "string"
+            },
+            "host": {
+              "title": "Host",
+              "default": "localhost",
+              "anyOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "array",
+                  "items": {
+                    "type": "string"
+                  }
+                }
+              ]
            },
            "port": {
              "title": "Port",
-              "default": 9200
+              "default": 9200,
+              "anyOf": [
+                {
+                  "type": "integer"
+                },
+                {
+                  "type": "array",
+                  "items": {
+                    "type": "integer"
+                  }
+                }
+              ]
+            },
+            "api_key_id": {
+              "title": "Api Key Id",
+              "type": "string"
+            },
+            "api_key": {
+              "title": "Api Key",
+              "type": "string"
+            },
+            "aws4auth": {
+              "title": "Aws4Auth"
+            },
+            "index": {
+              "title": "Index",
+              "default": "document",
+              "type": "string"
+            },
+            "label_index": {
+              "title": "Label Index",
+              "default": "label",
+              "type": "string"
+            },
+            "search_fields": {
+              "title": "Search Fields",
+              "default": "content",
+              "anyOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "array",
+                  "items": {}
+                }
+              ]
+            },
+            "content_field": {
+              "title": "Content Field",
+              "default": "content",
+              "type": "string"
+            },
+            "name_field": {
+              "title": "Name Field",
+              "default": "name",
+              "type": "string"
+            },
+            "embedding_field": {
+              "title": "Embedding Field",
+              "default": "embedding",
+              "type": "string"
+            },
+            "embedding_dim": {
+              "title": "Embedding Dim",
+              "default": 768,
+              "type": "integer"
+            },
+            "custom_mapping": {
+              "title": "Custom Mapping",
+              "type": "object"
+            },
+            "excluded_meta_data": {
+              "title": "Excluded Meta Data",
+              "type": "array",
+              "items": {}
+            },
+            "analyzer": {
+              "title": "Analyzer",
+              "default": "standard",
+              "type": "string"
+            },
+            "ca_certs": {
+              "title": "Ca Certs",
+              "type": "string"
+            },
+            "verify_certs": {
+              "title": "Verify Certs",
+              "default": false,
+              "type": "boolean"
+            },
+            "recreate_index": {
+              "title": "Recreate Index",
+              "default": false,
+              "type": "boolean"
+            },
+            "create_index": {
+              "title": "Create Index",
+              "default": true,
+              "type": "boolean"
+            },
+            "refresh_type": {
+              "title": "Refresh Type",
+              "default": "wait_for",
+              "type": "string"
+            },
+            "similarity": {
+              "title": "Similarity",
+              "default": "dot_product",
+              "type": "string"
+            },
+            "timeout": {
+              "title": "Timeout",
+              "default": 30,
+              "type": "integer"
+            },
+            "return_embedding": {
+              "title": "Return Embedding",
+              "default": false,
+              "type": "boolean"
+            },
+            "duplicate_documents": {
+              "title": "Duplicate Documents",
+              "default": "overwrite",
+              "type": "string"
+            },
+            "index_type": {
+              "title": "Index Type",
+              "default": "flat",
+              "type": "string"
+            },
+            "scroll": {
+              "title": "Scroll",
+              "default": "1d",
+              "type": "string"
+            },
+            "skip_missing_embeddings": {
+              "title": "Skip Missing Embeddings",
+              "default": true,
+              "type": "boolean"
+            },
+            "synonyms": {
+              "title": "Synonyms",
+              "type": "array",
+              "items": {}
+            },
+            "synonym_type": {
+              "title": "Synonym Type",
+              "default": "synonym",
+              "type": "string"
+            },
+            "use_system_proxy": {
+              "title": "Use System Proxy",
+              "default": false,
+              "type": "boolean"
            }
          },
          "additionalProperties": false,
--- a/haystack/json-schemas/haystack-pipeline.schema.json
+++ b/haystack/json-schemas/haystack-pipeline.schema.json
@ -58,9 +58,6 @@
                },
                {
                  "const": "1.3.0"
-                },
-                {
-                  "const": "1.3.1rc0"
                }
              ]
            }
@ -70,6 +67,24 @@
          "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.2.1rc0.schema.json"
        }
      ]
+    },
+    {
+      "allOf": [
+        {
+          "properties": {
+            "version": {
+              "oneOf": [
+                {
+                  "const": "1.3.1rc0"
+                }
+              ]
+            }
+          }
+        },
+        {
+          "$ref": "https://raw.githubusercontent.com/deepset-ai/haystack/master/json-schemas/haystack-pipeline-1.3.1rc0.schema.json"
+        }
+      ]
    }
  ]
 }
--- a/haystack/nodes/_json_schema.py
+++ b/haystack/nodes/_json_schema.py
@ -154,6 +154,13 @@ def create_schema_for_node_class(node_class: Type[BaseComponent]) -> Tuple[Dict[
        raise PipelineSchemaError(f"Could not read the __init__ method of {node_name} to create its schema.")

    signature = get_typed_signature(init_method)
+
+    # Check for variadic parameters (*args or **kwargs) and raise an exception if found
+    if any(param.kind in {param.VAR_POSITIONAL, param.VAR_KEYWORD} for param in signature.parameters.values()):
+        raise PipelineSchemaError(
+            "Nodes cannot use variadic parameters like *args or **kwargs in their __init__ function."
+        )
+
    param_fields = [
        param for param in signature.parameters.values() if param.kind not in {param.VAR_POSITIONAL, param.VAR_KEYWORD}
    ]
--- a/haystack/nodes/reader/farm.py
+++ b/haystack/nodes/reader/farm.py
@ -62,7 +62,6 @@ class FARMReader(BaseReader):
        local_files_only=False,
        force_download=False,
        use_auth_token: Optional[Union[str, bool]] = None,
-        **kwargs,
    ):

        """
@ -140,7 +139,6 @@ class FARMReader(BaseReader):
            force_download=force_download,
            devices=self.devices,
            use_auth_token=use_auth_token,
-            **kwargs,
        )
        self.inferencer.model.prediction_heads[0].context_window_size = context_window_size
        self.inferencer.model.prediction_heads[0].no_ans_boost = no_ans_boost
--- a/test/conftest.py
+++ b/test/conftest.py
@ -842,9 +842,7 @@ def get_document_store(
        )

    elif document_store_type == "weaviate":
-        document_store = WeaviateDocumentStore(
-            weaviate_url="http://localhost:8080", index=index, similarity=similarity, embedding_dim=embedding_dim
-        )
+        document_store = WeaviateDocumentStore(index=index, similarity=similarity, embedding_dim=embedding_dim)
        document_store.weaviate_client.schema.delete_all()
        document_store._create_schema_and_index_if_not_exist()

--- a/test/test_pipeline_yaml.py
+++ b/test/test_pipeline_yaml.py
@ -667,6 +667,72 @@ def test_load_yaml_custom_component_with_superclass(tmp_path):
    Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")


+def test_load_yaml_custom_component_with_variadic_args(tmp_path):
+    class BaseCustomNode(MockNode):
+        def __init__(self, base_parameter: int):
+            super().__init__()
+            self.base_parameter = base_parameter
+
+    class CustomNode(BaseCustomNode):
+        def __init__(self, some_parameter: str, *args):
+            super().__init__(*args)
+            self.some_parameter = some_parameter
+
+    with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
+        tmp_file.write(
+            f"""
+            version: unstable
+            components:
+            - name: custom_node
+              type: CustomNode
+              params:
+                base_parameter: 1
+                some_parameter: value
+            pipelines:
+            - name: my_pipeline
+              nodes:
+              - name: custom_node
+                inputs:
+                - Query
+        """
+        )
+    with pytest.raises(PipelineSchemaError, match="variadic"):
+        Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
+
+
+def test_load_yaml_custom_component_with_variadic_kwargs(tmp_path):
+    class BaseCustomNode(MockNode):
+        def __init__(self, base_parameter: int):
+            super().__init__()
+            self.base_parameter = base_parameter
+
+    class CustomNode(BaseCustomNode):
+        def __init__(self, some_parameter: str, **kwargs):
+            super().__init__(**kwargs)
+            self.some_parameter = some_parameter
+
+    with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
+        tmp_file.write(
+            f"""
+            version: unstable
+            components:
+            - name: custom_node
+              type: CustomNode
+              params:
+                base_parameter: 1
+                some_parameter: value
+            pipelines:
+            - name: my_pipeline
+              nodes:
+              - name: custom_node
+                inputs:
+                - Query
+        """
+        )
+    with pytest.raises(PipelineSchemaError, match="variadic"):
+        Pipeline.load_from_yaml(path=tmp_path / "tmp_config.yml")
+
+
 def test_load_yaml_no_pipelines(tmp_path):
    with open(tmp_path / "tmp_config.yml", "w") as tmp_file:
        tmp_file.write(
--- a/test/test_retriever.py
+++ b/test/test_retriever.py
@ -192,9 +192,7 @@ def test_retribert_embedding(document_store, retriever, docs):
    if isinstance(document_store, WeaviateDocumentStore):
        # Weaviate sets the embedding dimension to 768 as soon as it is initialized.
        # We need 128 here and therefore initialize a new WeaviateDocumentStore.
-        document_store = WeaviateDocumentStore(
-            weaviate_url="http://localhost:8080", index="haystack_test", embedding_dim=128
-        )
+        document_store = WeaviateDocumentStore(index="haystack_test", embedding_dim=128)
        document_store.weaviate_client.schema.delete_all()
        document_store._create_schema_and_index_if_not_exist()
    document_store.return_embedding = True