| 
									
										
										
										
											2023-01-24 10:01:39 +01:00
										 |  |  | import logging | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2023-06-01 18:47:24 +02:00
										 |  |  | from unittest.mock import MagicMock, patch | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | import numpy as np | 
					
						
							| 
									
										
										
										
											2023-01-24 10:01:39 +01:00
										 |  |  | import pytest | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  | from elasticsearch import Elasticsearch | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  | from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore, VERSION | 
					
						
							| 
									
										
										
										
											2023-02-16 09:43:25 +01:00
										 |  |  | from haystack.document_stores.es_converter import elasticsearch_index_to_document_store | 
					
						
							|  |  |  | from haystack.document_stores.memory import InMemoryDocumentStore | 
					
						
							|  |  |  | from haystack.nodes import PreProcessor | 
					
						
							| 
									
										
										
										
											2023-02-17 19:38:03 +01:00
										 |  |  | from haystack.testing import DocumentStoreBaseTestAbstract | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from .test_search_engine import SearchEngineDocumentStoreTestAbstract | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class TestElasticsearchDocumentStore(DocumentStoreBaseTestAbstract, SearchEngineDocumentStoreTestAbstract): | 
					
						
							|  |  |  |     # Constants | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     index_name = __name__ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.fixture | 
					
						
							|  |  |  |     def ds(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |         This fixture provides a working document store and takes care of keeping clean | 
					
						
							|  |  |  |         the ES cluster used in the tests. | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         labels_index_name = f"{self.index_name}_labels" | 
					
						
							|  |  |  |         ds = ElasticsearchDocumentStore( | 
					
						
							|  |  |  |             index=self.index_name, | 
					
						
							|  |  |  |             label_index=labels_index_name, | 
					
						
							|  |  |  |             host=os.environ.get("ELASTICSEARCH_HOST", "localhost"), | 
					
						
							|  |  |  |             create_index=True, | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |             recreate_index=True, | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  |         yield ds | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-24 10:01:39 +01:00
										 |  |  |     @pytest.fixture | 
					
						
							|  |  |  |     def mocked_elastic_search_init(self, monkeypatch): | 
					
						
							|  |  |  |         mocked_init = MagicMock(return_value=None) | 
					
						
							|  |  |  |         monkeypatch.setattr(Elasticsearch, "__init__", mocked_init) | 
					
						
							|  |  |  |         return mocked_init | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.fixture | 
					
						
							|  |  |  |     def mocked_elastic_search_ping(self, monkeypatch): | 
					
						
							|  |  |  |         mocked_ping = MagicMock(return_value=True) | 
					
						
							|  |  |  |         monkeypatch.setattr(Elasticsearch, "ping", mocked_ping) | 
					
						
							|  |  |  |         return mocked_ping | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-09 11:58:23 +01:00
										 |  |  |     @pytest.fixture | 
					
						
							|  |  |  |     def mocked_document_store(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         The fixture provides an instance of a slightly customized | 
					
						
							|  |  |  |         ElasticsearchDocumentStore equipped with a mocked client | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-13 14:50:43 +02:00
										 |  |  |         with patch( | 
					
						
							|  |  |  |             f"{ElasticsearchDocumentStore.__module__}.ElasticsearchDocumentStore._init_elastic_client" | 
					
						
							|  |  |  |         ) as mocked_init_client: | 
					
						
							|  |  |  |             if VERSION[0] == 7: | 
					
						
							|  |  |  |                 mocked_init_client().info.return_value = {"version": {"number": "7.17.6"}} | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 mocked_init_client().info.return_value = {"version": {"number": "8.8.0"}} | 
					
						
							| 
									
										
										
										
											2023-01-09 11:58:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-30 11:55:52 +02:00
										 |  |  |             class DSMock(ElasticsearchDocumentStore): | 
					
						
							|  |  |  |                 # We mock a subclass to avoid messing up the actual class object | 
					
						
							|  |  |  |                 pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             DSMock.client = MagicMock() | 
					
						
							|  |  |  |             yield DSMock() | 
					
						
							| 
									
										
										
										
											2023-01-09 11:58:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test___init__(self): | 
					
						
							|  |  |  |         # defaults | 
					
						
							|  |  |  |         _ = ElasticsearchDocumentStore() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # list of hosts + single port | 
					
						
							|  |  |  |         _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=9200) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # list of hosts + list of ports (wrong) | 
					
						
							|  |  |  |         with pytest.raises(Exception): | 
					
						
							|  |  |  |             _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # list of hosts + list | 
					
						
							|  |  |  |         _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200, 9200]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # only api_key | 
					
						
							|  |  |  |         with pytest.raises(Exception): | 
					
						
							|  |  |  |             _ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # api_key +  id | 
					
						
							|  |  |  |         _ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test", api_key_id="test") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_recreate_index(self, ds, documents, labels): | 
					
						
							|  |  |  |         ds.write_documents(documents) | 
					
						
							|  |  |  |         ds.write_labels(labels) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Create another document store on top of the previous one | 
					
						
							|  |  |  |         ds = ElasticsearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True) | 
					
						
							|  |  |  |         assert len(ds.get_all_documents(index=ds.index)) == 0 | 
					
						
							|  |  |  |         assert len(ds.get_all_labels(index=ds.label_index)) == 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_eq_filter(self, ds, documents): | 
					
						
							|  |  |  |         ds.write_documents(documents) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         filter = {"name": {"$eq": ["name_0"]}} | 
					
						
							|  |  |  |         filtered_docs = ds.get_all_documents(filters=filter) | 
					
						
							|  |  |  |         assert len(filtered_docs) == 3 | 
					
						
							|  |  |  |         for doc in filtered_docs: | 
					
						
							|  |  |  |             assert doc.meta["name"] == "name_0" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         filter = {"numbers": {"$eq": [2, 4]}} | 
					
						
							|  |  |  |         filtered_docs = ds.query(query=None, filters=filter) | 
					
						
							|  |  |  |         assert len(filtered_docs) == 3 | 
					
						
							|  |  |  |         for doc in filtered_docs: | 
					
						
							|  |  |  |             assert doc.meta["month"] == "01" | 
					
						
							|  |  |  |             assert doc.meta["numbers"] == [2, 4] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_custom_fields(self, ds): | 
					
						
							|  |  |  |         index = "haystack_test_custom" | 
					
						
							|  |  |  |         document_store = ElasticsearchDocumentStore( | 
					
						
							|  |  |  |             index=index, | 
					
						
							|  |  |  |             content_field="custom_text_field", | 
					
						
							|  |  |  |             embedding_field="custom_embedding_field", | 
					
						
							|  |  |  |             recreate_index=True, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         doc_to_write = {"custom_text_field": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)} | 
					
						
							|  |  |  |         document_store.write_documents([doc_to_write]) | 
					
						
							|  |  |  |         documents = document_store.get_all_documents(return_embedding=True) | 
					
						
							|  |  |  |         assert len(documents) == 1 | 
					
						
							|  |  |  |         assert documents[0].content == "test" | 
					
						
							|  |  |  |         np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding) | 
					
						
							|  |  |  |         document_store.delete_index(index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_query_with_filters_and_missing_embeddings(self, ds, documents): | 
					
						
							|  |  |  |         ds.write_documents(documents) | 
					
						
							|  |  |  |         filters = {"month": {"$in": ["01", "03"]}} | 
					
						
							|  |  |  |         ds.skip_missing_embeddings = False | 
					
						
							|  |  |  |         with pytest.raises(ds._RequestError): | 
					
						
							|  |  |  |             ds.query_by_embedding(np.random.rand(768), filters=filters) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ds.skip_missing_embeddings = True | 
					
						
							|  |  |  |         documents = ds.query_by_embedding(np.random.rand(768), filters=filters) | 
					
						
							|  |  |  |         assert len(documents) == 3 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_synonyms(self, ds): | 
					
						
							|  |  |  |         synonyms = ["i-pod, i pod, ipod", "sea biscuit, sea biscit, seabiscuit", "foo, foo bar, baz"] | 
					
						
							|  |  |  |         synonym_type = "synonym_graph" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         client = ds.client | 
					
						
							|  |  |  |         index = "haystack_synonym_arg" | 
					
						
							|  |  |  |         client.indices.delete(index=index, ignore=[404]) | 
					
						
							|  |  |  |         ElasticsearchDocumentStore(index=index, synonyms=synonyms, synonym_type=synonym_type) | 
					
						
							|  |  |  |         indexed_settings = client.indices.get_settings(index=index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         assert synonym_type == indexed_settings[index]["settings"]["index"]["analysis"]["filter"]["synonym"]["type"] | 
					
						
							|  |  |  |         assert synonyms == indexed_settings[index]["settings"]["index"]["analysis"]["filter"]["synonym"]["synonyms"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_search_field_mapping(self): | 
					
						
							|  |  |  |         index = "haystack_search_field_mapping" | 
					
						
							|  |  |  |         document_store = ElasticsearchDocumentStore( | 
					
						
							|  |  |  |             index=index, search_fields=["content", "sub_content"], content_field="title" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         document_store.write_documents( | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "title": "Green tea components", | 
					
						
							|  |  |  |                     "meta": { | 
					
						
							|  |  |  |                         "content": "The green tea plant contains a range of healthy compounds that make it into the final drink", | 
					
						
							|  |  |  |                         "sub_content": "Drink tip", | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                     "id": "1", | 
					
						
							|  |  |  |                 }, | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "title": "Green tea catechin", | 
					
						
							|  |  |  |                     "meta": { | 
					
						
							|  |  |  |                         "content": "Green tea contains a catechin called epigallocatechin-3-gallate (EGCG).", | 
					
						
							|  |  |  |                         "sub_content": "Ingredients tip", | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                     "id": "2", | 
					
						
							|  |  |  |                 }, | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "title": "Minerals in Green tea", | 
					
						
							|  |  |  |                     "meta": { | 
					
						
							|  |  |  |                         "content": "Green tea also has small amounts of minerals that can benefit your health.", | 
					
						
							|  |  |  |                         "sub_content": "Minerals tip", | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                     "id": "3", | 
					
						
							|  |  |  |                 }, | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     "title": "Green tea Benefits", | 
					
						
							|  |  |  |                     "meta": { | 
					
						
							|  |  |  |                         "content": "Green tea does more than just keep you alert, it may also help boost brain function.", | 
					
						
							|  |  |  |                         "sub_content": "Health tip", | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                     "id": "4", | 
					
						
							|  |  |  |                 }, | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         indexed_settings = document_store.client.indices.get_mapping(index=index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         assert indexed_settings[index]["mappings"]["properties"]["content"]["type"] == "text" | 
					
						
							|  |  |  |         assert indexed_settings[index]["mappings"]["properties"]["sub_content"]["type"] == "text" | 
					
						
							|  |  |  |         document_store.delete_index(index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_existing_alias(self, ds): | 
					
						
							|  |  |  |         client = ds.client | 
					
						
							|  |  |  |         client.indices.delete(index="haystack_existing_alias_1", ignore=[404]) | 
					
						
							|  |  |  |         client.indices.delete(index="haystack_existing_alias_2", ignore=[404]) | 
					
						
							|  |  |  |         client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         settings = {"mappings": {"properties": {"content": {"type": "text"}}}} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |         client.indices.create(index="haystack_existing_alias_1", **settings) | 
					
						
							|  |  |  |         client.indices.create(index="haystack_existing_alias_2", **settings) | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         client.indices.put_alias( | 
					
						
							|  |  |  |             index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # To be valid, all indices related to the alias must have content field of type text | 
					
						
							|  |  |  |         ElasticsearchDocumentStore(index="haystack_existing_alias", search_fields=["content"]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_existing_alias_missing_fields(self, ds): | 
					
						
							|  |  |  |         client = ds.client | 
					
						
							|  |  |  |         client.indices.delete(index="haystack_existing_alias_1", ignore=[404]) | 
					
						
							|  |  |  |         client.indices.delete(index="haystack_existing_alias_2", ignore=[404]) | 
					
						
							|  |  |  |         client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         right_settings = {"mappings": {"properties": {"content": {"type": "text"}}}} | 
					
						
							|  |  |  |         wrong_settings = {"mappings": {"properties": {"content": {"type": "histogram"}}}} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |         client.indices.create(index="haystack_existing_alias_1", **right_settings) | 
					
						
							|  |  |  |         client.indices.create(index="haystack_existing_alias_2", **wrong_settings) | 
					
						
							| 
									
										
										
										
											2022-10-31 15:30:14 +01:00
										 |  |  |         client.indices.put_alias( | 
					
						
							|  |  |  |             index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         with pytest.raises(Exception): | 
					
						
							|  |  |  |             # wrong field type for "content" in index "haystack_existing_alias_2" | 
					
						
							|  |  |  |             ElasticsearchDocumentStore( | 
					
						
							|  |  |  |                 index="haystack_existing_alias", search_fields=["content"], content_field="title" | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_get_document_count_only_documents_without_embedding_arg(self, ds, documents): | 
					
						
							|  |  |  |         ds.write_documents(documents) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         assert ds.get_document_count() == 9 | 
					
						
							|  |  |  |         assert ds.get_document_count(only_documents_without_embedding=True) == 3 | 
					
						
							|  |  |  |         assert ds.get_document_count(only_documents_without_embedding=True, filters={"month": ["01"]}) == 0 | 
					
						
							|  |  |  |         assert ds.get_document_count(only_documents_without_embedding=True, filters={"month": ["03"]}) == 3 | 
					
						
							| 
									
										
										
										
											2023-01-24 10:01:39 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-16 09:43:25 +01:00
										 |  |  |     @pytest.mark.integration | 
					
						
							|  |  |  |     def test_elasticsearch_brownfield_support(self, ds, documents): | 
					
						
							|  |  |  |         ds.write_documents(documents) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         new_document_store = elasticsearch_index_to_document_store( | 
					
						
							|  |  |  |             document_store=InMemoryDocumentStore(), | 
					
						
							|  |  |  |             original_index_name=ds.index, | 
					
						
							|  |  |  |             original_content_field="content", | 
					
						
							|  |  |  |             original_name_field="name", | 
					
						
							|  |  |  |             included_metadata_fields=["date_field"], | 
					
						
							|  |  |  |             index="test_brownfield_support", | 
					
						
							|  |  |  |             id_hash_keys=["content", "meta"], | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         original_documents = ds.get_all_documents() | 
					
						
							|  |  |  |         transferred_documents = new_document_store.get_all_documents(index="test_brownfield_support") | 
					
						
							|  |  |  |         assert len(original_documents) == len(transferred_documents) | 
					
						
							|  |  |  |         assert all("name" in doc.meta for doc in transferred_documents) | 
					
						
							|  |  |  |         assert all(doc.id == doc._get_id(["content", "meta"]) for doc in transferred_documents) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         original_content = set([doc.content for doc in original_documents]) | 
					
						
							|  |  |  |         transferred_content = set([doc.content for doc in transferred_documents]) | 
					
						
							|  |  |  |         assert original_content == transferred_content | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Test transferring docs with PreProcessor | 
					
						
							|  |  |  |         new_document_store = elasticsearch_index_to_document_store( | 
					
						
							|  |  |  |             document_store=InMemoryDocumentStore(), | 
					
						
							|  |  |  |             original_index_name=ds.index, | 
					
						
							|  |  |  |             original_content_field="content", | 
					
						
							|  |  |  |             excluded_metadata_fields=["date_field"], | 
					
						
							|  |  |  |             index="test_brownfield_support_2", | 
					
						
							|  |  |  |             preprocessor=PreProcessor(split_length=1, split_respect_sentence_boundary=False), | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         transferred_documents = new_document_store.get_all_documents(index="test_brownfield_support_2") | 
					
						
							|  |  |  |         assert all("name" in doc.meta for doc in transferred_documents) | 
					
						
							|  |  |  |         # Check if number of transferred_documents is equal to number of unique words. | 
					
						
							|  |  |  |         assert len(transferred_documents) == len(set(" ".join(original_content).split())) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 is not supported") | 
					
						
							| 
									
										
										
										
											2023-01-24 10:01:39 +01:00
										 |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test__init_elastic_client_aws4auth_and_username_raises_warning( | 
					
						
							|  |  |  |         self, caplog, mocked_elastic_search_init, mocked_elastic_search_ping | 
					
						
							|  |  |  |     ): | 
					
						
							|  |  |  |         _init_client_remaining_kwargs = { | 
					
						
							|  |  |  |             "host": "host", | 
					
						
							|  |  |  |             "port": 443, | 
					
						
							|  |  |  |             "password": "pass", | 
					
						
							|  |  |  |             "api_key_id": None, | 
					
						
							|  |  |  |             "api_key": None, | 
					
						
							|  |  |  |             "scheme": "https", | 
					
						
							|  |  |  |             "ca_certs": None, | 
					
						
							|  |  |  |             "verify_certs": True, | 
					
						
							|  |  |  |             "timeout": 10, | 
					
						
							|  |  |  |             "use_system_proxy": False, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         with caplog.at_level(logging.WARN, logger="haystack.document_stores.elasticsearch"): | 
					
						
							|  |  |  |             ElasticsearchDocumentStore._init_elastic_client( | 
					
						
							|  |  |  |                 username="admin", aws4auth="foo", **_init_client_remaining_kwargs | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         assert len(caplog.records) == 1 | 
					
						
							|  |  |  |         for r in caplog.records: | 
					
						
							|  |  |  |             assert r.levelname == "WARNING" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         caplog.clear() | 
					
						
							|  |  |  |         with caplog.at_level(logging.WARN, logger="haystack.document_stores.elasticsearch"): | 
					
						
							|  |  |  |             ElasticsearchDocumentStore._init_elastic_client( | 
					
						
							|  |  |  |                 username=None, aws4auth="foo", **_init_client_remaining_kwargs | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             ElasticsearchDocumentStore._init_elastic_client( | 
					
						
							|  |  |  |                 username="", aws4auth="foo", **_init_client_remaining_kwargs | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         assert len(caplog.records) == 0 | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |     @pytest.mark.unit | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     def test_get_document_by_id_return_embedding_false_es7(self, mocked_document_store): | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |         mocked_document_store.return_embedding = False | 
					
						
							|  |  |  |         mocked_document_store.get_document_by_id("123") | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.search.call_args | 
					
						
							|  |  |  |         assert kwargs["_source"] == {"excludes": ["embedding"]} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_document_by_id_return_embedding_false_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = False | 
					
						
							|  |  |  |         mocked_document_store.get_document_by_id("123") | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         assert kwargs["_source"] == {"excludes": ["embedding"]} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |     @pytest.mark.unit | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     def test_get_document_by_id_excluded_meta_data_has_no_influence_es7(self, mocked_document_store): | 
					
						
							| 
									
										
										
										
											2023-04-18 15:40:17 +02:00
										 |  |  |         mocked_document_store.excluded_meta_data = ["foo"] | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = False | 
					
						
							|  |  |  |         mocked_document_store.get_document_by_id("123") | 
					
						
							|  |  |  |         # assert the resulting body is not affected by the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.search.call_args | 
					
						
							|  |  |  |         assert kwargs["_source"] == {"excludes": ["embedding"]} | 
					
						
							| 
									
										
										
										
											2023-06-01 18:47:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_document_by_id_excluded_meta_data_has_no_influence_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.excluded_meta_data = ["foo"] | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = False | 
					
						
							|  |  |  |         mocked_document_store.get_document_by_id("123") | 
					
						
							|  |  |  |         # assert the resulting body is not affected by the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         assert kwargs["_source"] == {"excludes": ["embedding"]} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-01 18:47:24 +02:00
										 |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_write_documents_req_for_each_batch(self, mocked_document_store, documents): | 
					
						
							|  |  |  |         mocked_document_store.batch_size = 2 | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  |         with patch(f"{ElasticsearchDocumentStore.__module__}.bulk") as mocked_bulk: | 
					
						
							| 
									
										
										
										
											2023-06-01 18:47:24 +02:00
										 |  |  |             mocked_document_store.write_documents(documents) | 
					
						
							|  |  |  |             assert mocked_bulk.call_count == 5 | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-13 14:50:43 +02:00
										 |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_vector_similarity_query(self, mocked_document_store): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Test that the source field of the vector similarity query is correctly formatted for ES 7.6 and above. | 
					
						
							|  |  |  |         We test this to make sure we use the correct syntax for newer ES versions. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         vec_sim_query = mocked_document_store._get_vector_similarity_query(np.random.rand(3).astype(np.float32), 10) | 
					
						
							|  |  |  |         assert vec_sim_query["script_score"]["script"]["source"] == "dotProduct(params.query_vector,'embedding') + 1000" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_vector_similarity_query_es_7_5_and_below(self, mocked_document_store): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Test that the source field of the vector similarity query is correctly formatter for ES 7.5 and below. | 
					
						
							|  |  |  |         We test this to make sure we use the correct syntax for ES versions older than 7.6, as the syntax changed | 
					
						
							|  |  |  |         in 7.6. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         # Patch server version to be 7.5.0 | 
					
						
							|  |  |  |         mocked_document_store.server_version = (7, 5, 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         vec_sim_query = mocked_document_store._get_vector_similarity_query(np.random.rand(3).astype(np.float32), 10) | 
					
						
							|  |  |  |         assert ( | 
					
						
							|  |  |  |             vec_sim_query["script_score"]["script"]["source"] | 
					
						
							|  |  |  |             == "dotProduct(params.query_vector,doc['embedding']) + 1000" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  |     # The following tests are overridden only to be able to skip them depending on ES version | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_all_documents_return_embedding_true(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_get_all_documents_return_embedding_true(mocked_document_store) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_all_documents_return_embedding_true_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = False | 
					
						
							|  |  |  |         mocked_document_store.client.options().search.return_value = {} | 
					
						
							|  |  |  |         mocked_document_store.get_all_documents(return_embedding=True) | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         assert "_source" not in kwargs | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_all_documents_return_embedding_false(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_get_all_documents_return_embedding_false(mocked_document_store) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_all_documents_return_embedding_false_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = True | 
					
						
							|  |  |  |         mocked_document_store.client.options().search.return_value = {} | 
					
						
							|  |  |  |         mocked_document_store.get_all_documents(return_embedding=False) | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         body = kwargs.get("body", kwargs) | 
					
						
							|  |  |  |         assert body["_source"] == {"excludes": ["embedding"]} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							| 
									
										
										
										
											2023-06-29 16:40:10 +02:00
										 |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_all_documents_excluded_meta_data_has_no_influence(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_get_all_documents_excluded_meta_data_has_no_influence(mocked_document_store) | 
					
						
							| 
									
										
										
										
											2023-07-10 16:03:50 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_all_documents_excluded_meta_data_has_no_influence_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.excluded_meta_data = ["foo"] | 
					
						
							|  |  |  |         mocked_document_store.client.options().search.return_value = {} | 
					
						
							|  |  |  |         mocked_document_store.get_all_documents(return_embedding=False) | 
					
						
							|  |  |  |         # assert the resulting body is not affected by the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         body = kwargs.get("body", kwargs) | 
					
						
							|  |  |  |         assert body["_source"] == {"excludes": ["embedding"]} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_return_embedding_true(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_query_return_embedding_true(mocked_document_store) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_return_embedding_true_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = True | 
					
						
							|  |  |  |         mocked_document_store.query(self.query) | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         assert "_source" not in kwargs | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_return_embedding_false(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_query_return_embedding_false(mocked_document_store) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_return_embedding_false_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = False | 
					
						
							|  |  |  |         mocked_document_store.query(self.query) | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         assert kwargs["_source"] == {"excludes": ["embedding"]} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_excluded_meta_data_return_embedding_true(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_query_excluded_meta_data_return_embedding_true(mocked_document_store) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_excluded_meta_data_return_embedding_true_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = True | 
					
						
							|  |  |  |         mocked_document_store.excluded_meta_data = ["foo", "embedding"] | 
					
						
							|  |  |  |         mocked_document_store.query(self.query) | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         # we expect "embedding" was removed from the final query | 
					
						
							|  |  |  |         assert kwargs["_source"] == {"excludes": ["foo"]} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_excluded_meta_data_return_embedding_false(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_query_excluded_meta_data_return_embedding_false(mocked_document_store) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_query_excluded_meta_data_return_embedding_false_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = False | 
					
						
							|  |  |  |         mocked_document_store.excluded_meta_data = ["foo"] | 
					
						
							|  |  |  |         mocked_document_store.query(self.query) | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         assert kwargs["_source"] == {"excludes": ["foo", "embedding"]} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 8, reason="Elasticsearch 8 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_document_by_id_return_embedding_true(self, mocked_document_store): | 
					
						
							|  |  |  |         super().test_get_document_by_id_return_embedding_true(mocked_document_store) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @pytest.mark.skipif(VERSION[0] == 7, reason="Elasticsearch 7 uses a different client call") | 
					
						
							|  |  |  |     @pytest.mark.unit | 
					
						
							|  |  |  |     def test_get_document_by_id_return_embedding_true_es8(self, mocked_document_store): | 
					
						
							|  |  |  |         mocked_document_store.return_embedding = True | 
					
						
							|  |  |  |         mocked_document_store.get_document_by_id("123") | 
					
						
							|  |  |  |         # assert the resulting body is consistent with the `excluded_meta_data` value | 
					
						
							|  |  |  |         _, kwargs = mocked_document_store.client.options().search.call_args | 
					
						
							|  |  |  |         assert "_source" not in kwargs |