diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
index dc0eb21c2..93dc818e1 100644
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@@ -239,7 +239,7 @@ class ElasticsearchDocumentStore(KeywordDocumentStore)
#### \_\_init\_\_
```python
- | __init__(host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, username: str = "", password: str = "", api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", scheme: str = "http", ca_certs: Optional[str] = None, verify_certs: bool = True, create_index: bool = True, refresh_type: str = "wait_for", similarity="dot_product", timeout=30, return_embedding: bool = False, duplicate_documents: str = 'overwrite', index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym")
+ | __init__(host: Union[str, List[str]] = "localhost", port: Union[int, List[int]] = 9200, username: str = "", password: str = "", api_key_id: Optional[str] = None, api_key: Optional[str] = None, aws4auth=None, index: str = "document", label_index: str = "label", search_fields: Union[str, list] = "content", content_field: str = "content", name_field: str = "name", embedding_field: str = "embedding", embedding_dim: int = 768, custom_mapping: Optional[dict] = None, excluded_meta_data: Optional[list] = None, analyzer: str = "standard", scheme: str = "http", ca_certs: Optional[str] = None, verify_certs: bool = True, recreate_index: bool = False, create_index: bool = True, refresh_type: str = "wait_for", similarity="dot_product", timeout=30, return_embedding: bool = False, duplicate_documents: str = 'overwrite', index_type: str = "flat", scroll: str = "1d", skip_missing_embeddings: bool = True, synonyms: Optional[List] = None, synonym_type: str = "synonym")
```
A DocumentStore using Elasticsearch to store and query the documents for our search.
@@ -274,7 +274,16 @@ A DocumentStore using Elasticsearch to store and query the documents for our sea
- `scheme`: 'https' or 'http', protocol used to connect to your elasticsearch instance
- `ca_certs`: Root certificates for SSL: it is a path to certificate authority (CA) certs on disk. You can use certifi package with certifi.where() to find where the CA certs file is located in your machine.
- `verify_certs`: Whether to be strict about ca certificates
-- `create_index`: Whether to try creating a new index (If the index of that name is already existing, we will just continue in any case
+- `recreate_index`: If set to True, an existing elasticsearch index will be deleted and a new one will be
+ created using the config you are using for initialization. Be aware that all data in the old index will be
+ lost if you choose to recreate the index. Be aware that both the document_index and the label_index will
+ be recreated.
+- `create_index`:
+ Whether to try creating a new index (If the index of that name is already existing, we will just continue in any case)
+ ..deprecated:: 2.0
+ This param is deprecated. In the next major version we will always try to create an index if there is no
+ existing index (the current behaviour when create_index=True). If you are looking to recreate an
+ existing index by deleting it first if it already exist use param recreate_index.
- `refresh_type`: Type of ES refresh used to control when changes made by a request (e.g. bulk) are made visible to search.
If set to 'wait_for', continue only after changes are visible (slow, but safe).
If set to 'false', continue directly (fast, but sometimes unintuitive behaviour when docs are not immediately available after ingestion).
@@ -708,6 +717,23 @@ Delete labels in an index. All labels are deleted if no filters are passed.
None
+
+#### delete\_index
+
+```python
+ | delete_index(index: str)
+```
+
+Delete an existing elasticsearch index. The index including all data will be removed.
+
+**Arguments**:
+
+- `index`: The name of the index to delete.
+
+**Returns**:
+
+None
+
## OpenSearchDocumentStore
diff --git a/docs/_src/tutorials/tutorials/7.md b/docs/_src/tutorials/tutorials/7.md
index 36b426a4b..f49d87160 100644
--- a/docs/_src/tutorials/tutorials/7.md
+++ b/docs/_src/tutorials/tutorials/7.md
@@ -170,6 +170,27 @@ These are used to condition the generator as it generates the answer.
What it should return then are novel text spans that form and answer to your question!
+```python
+# Now generate an answer for each question
+for question in QUESTIONS:
+ # Retrieve related documents from retriever
+ retriever_results = retriever.retrieve(
+ query=question
+ )
+
+ # Now generate answer from question and retrieved documents
+ predicted_result = generator.predict(
+ query=question,
+ documents=retriever_results,
+ top_k=1
+ )
+
+ # Print you answer
+ answers = predicted_result["answers"]
+ print(f'Generated answer is \'{answers[0].answer}\' for the question = \'{question}\'')
+```
+
+
```python
# Or alternatively use the Pipeline class
from haystack.pipelines import GenerativeQAPipeline
diff --git a/haystack/document_stores/elasticsearch.py b/haystack/document_stores/elasticsearch.py
index c4d67d2b3..075a5333b 100644
--- a/haystack/document_stores/elasticsearch.py
+++ b/haystack/document_stores/elasticsearch.py
@@ -50,6 +50,7 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
scheme: str = "http",
ca_certs: Optional[str] = None,
verify_certs: bool = True,
+ recreate_index: bool = False,
create_index: bool = True,
refresh_type: str = "wait_for",
similarity="dot_product",
@@ -93,7 +94,16 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
:param scheme: 'https' or 'http', protocol used to connect to your elasticsearch instance
:param ca_certs: Root certificates for SSL: it is a path to certificate authority (CA) certs on disk. You can use certifi package with certifi.where() to find where the CA certs file is located in your machine.
:param verify_certs: Whether to be strict about ca certificates
- :param create_index: Whether to try creating a new index (If the index of that name is already existing, we will just continue in any case
+ :param recreate_index: If set to True, an existing elasticsearch index will be deleted and a new one will be
+ created using the config you are using for initialization. Be aware that all data in the old index will be
+ lost if you choose to recreate the index. Be aware that both the document_index and the label_index will
+ be recreated.
+ :param create_index:
+ Whether to try creating a new index (If the index of that name is already existing, we will just continue in any case)
+ ..deprecated:: 2.0
+ This param is deprecated. In the next major version we will always try to create an index if there is no
+ existing index (the current behaviour when create_index=True). If you are looking to recreate an
+ existing index by deleting it first if it already exist use param recreate_index.
:param refresh_type: Type of ES refresh used to control when changes made by a request (e.g. bulk) are made visible to search.
If set to 'wait_for', continue only after changes are visible (slow, but safe).
If set to 'false', continue directly (fast, but sometimes unintuitive behaviour when docs are not immediately available after ingestion).
@@ -175,6 +185,12 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
if index_type == "hnsw" and type(self) == ElasticsearchDocumentStore:
raise Exception("The HNSW algorithm for approximate nearest neighbours calculation is currently not available in the ElasticSearchDocumentStore. "
"Try the OpenSearchDocumentStore instead.")
+ if recreate_index:
+ self.delete_index(index)
+ self.delete_index(label_index)
+ self._create_document_index(index)
+ self._create_label_index(index)
+
if create_index:
self._create_document_index(index)
self._create_label_index(label_index)
@@ -1243,6 +1259,16 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
index = index or self.label_index
self.delete_documents(index=index, ids=ids, filters=filters, headers=headers)
+ def delete_index(self, index: str):
+ """
+ Delete an existing elasticsearch index. The index including all data will be removed.
+
+ :param index: The name of the index to delete.
+ :return: None
+ """
+ self.client.indices.delete(index=index, ignore=[400, 404])
+ logger.debug(f'deleted elasticsearch index {index}')
+
class OpenSearchDocumentStore(ElasticsearchDocumentStore):
"""
diff --git a/test/test_document_store.py b/test/test_document_store.py
index cefe9123d..6219ae77f 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -53,6 +53,32 @@ def test_init_elastic_client():
_ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test", api_key_id="test")
+@pytest.mark.elasticsearch
+def test_init_elastic_doc_store_with_index_recreation():
+ index_name = 'test_index_recreation'
+ label_index_name = 'test_index_recreation_labels'
+
+ document_store = ElasticsearchDocumentStore(index=index_name, label_index=label_index_name)
+ documents = [Document(content="Doc1")]
+ labels = [Label(
+ query='query',
+ document=documents[0],
+ is_correct_document=True,
+ is_correct_answer=False,
+ origin='user-feedback',
+ answer=None
+ )]
+ document_store.write_documents(documents, index=index_name)
+ document_store.write_labels(labels, index=label_index_name)
+
+ document_store = ElasticsearchDocumentStore(index=index_name, label_index=label_index_name, recreate_index=True)
+ docs = document_store.get_all_documents(index=index_name)
+ labels = document_store.get_all_labels(index=label_index_name)
+
+ assert len(docs) == 0
+ assert len(labels) == 0
+
+
def test_write_with_duplicate_doc_ids(document_store):
duplicate_documents = [
Document(
@@ -910,6 +936,24 @@ def test_elasticsearch_custom_fields():
np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
+@pytest.mark.elasticsearch
+def test_elasticsearch_delete_index():
+ client = Elasticsearch()
+ index_name = "haystack_test_deletion"
+
+ document_store = ElasticsearchDocumentStore(index=index_name)
+
+ # the index should exist
+ index_exists = client.indices.exists(index=index_name)
+ assert index_exists
+
+ document_store.delete_index(index_name)
+
+ # the index was deleted and should not exist
+ index_exists = client.indices.exists(index=index_name)
+ assert not index_exists
+
+
@pytest.mark.elasticsearch
def test_get_document_count_only_documents_without_embedding_arg():
documents = [