| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | import numpy as np | 
					
						
							|  |  |  | import pytest | 
					
						
							| 
									
										
										
										
											2021-10-25 15:50:23 +02:00
										 |  |  | from haystack.schema import Document | 
					
						
							| 
									
										
										
										
											2022-03-15 11:17:26 +01:00
										 |  |  | from .conftest import get_document_store | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | import uuid | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | embedding_dim = 768 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-04 09:27:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | def get_uuid(): | 
					
						
							|  |  |  |     return str(uuid.uuid4()) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-04 09:27:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | DOCUMENTS = [ | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     {"content": "text1", "id": "not a correct uuid", "key": "a"}, | 
					
						
							|  |  |  |     {"content": "text2", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)}, | 
					
						
							|  |  |  |     {"content": "text3", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)}, | 
					
						
							|  |  |  |     {"content": "text4", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)}, | 
					
						
							|  |  |  |     {"content": "text5", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)}, | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | DOCUMENTS_XS = [ | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     # current "dict" format for a document | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         "content": "My name is Carla and I live in Berlin", | 
					
						
							|  |  |  |         "id": get_uuid(), | 
					
						
							|  |  |  |         "meta": {"metafield": "test1", "name": "filename1"}, | 
					
						
							|  |  |  |         "embedding": np.random.rand(embedding_dim).astype(np.float32), | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     # meta_field at the top level for backward compatibility | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         "content": "My name is Paul and I live in New York", | 
					
						
							|  |  |  |         "id": get_uuid(), | 
					
						
							|  |  |  |         "metafield": "test2", | 
					
						
							|  |  |  |         "name": "filename2", | 
					
						
							|  |  |  |         "embedding": np.random.rand(embedding_dim).astype(np.float32), | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     # Document object for a doc | 
					
						
							|  |  |  |     Document( | 
					
						
							|  |  |  |         content="My name is Christelle and I live in Paris", | 
					
						
							|  |  |  |         id=get_uuid(), | 
					
						
							|  |  |  |         meta={"metafield": "test3", "name": "filename3"}, | 
					
						
							|  |  |  |         embedding=np.random.rand(embedding_dim).astype(np.float32), | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  | ] | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-04 09:27:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | @pytest.fixture(params=["weaviate"]) | 
					
						
							| 
									
										
										
										
											2022-01-14 13:48:58 +01:00
										 |  |  | def document_store_with_docs(request, tmp_path): | 
					
						
							|  |  |  |     document_store = get_document_store(request.param, tmp_path=tmp_path) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     document_store.write_documents(DOCUMENTS_XS) | 
					
						
							|  |  |  |     yield document_store | 
					
						
							| 
									
										
										
										
											2022-04-26 19:06:30 +02:00
										 |  |  |     document_store.delete_index(document_store.index) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-04 09:27:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | @pytest.fixture(params=["weaviate"]) | 
					
						
							| 
									
										
										
										
											2022-01-14 13:48:58 +01:00
										 |  |  | def document_store(request, tmp_path): | 
					
						
							|  |  |  |     document_store = get_document_store(request.param, tmp_path=tmp_path) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     yield document_store | 
					
						
							| 
									
										
										
										
											2022-04-26 19:06:30 +02:00
										 |  |  |     document_store.delete_index(document_store.index) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.mark.weaviate | 
					
						
							|  |  |  | @pytest.mark.parametrize("document_store", ["weaviate"], indirect=True) | 
					
						
							|  |  |  | @pytest.mark.parametrize("batch_size", [2]) | 
					
						
							|  |  |  | def test_weaviate_write_docs(document_store, batch_size): | 
					
						
							|  |  |  |     # Write in small batches | 
					
						
							|  |  |  |     for i in range(0, len(DOCUMENTS), batch_size): | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |         document_store.write_documents(DOCUMENTS[i : i + batch_size]) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | 
 | 
					
						
							|  |  |  |     documents_indexed = document_store.get_all_documents() | 
					
						
							|  |  |  |     assert len(documents_indexed) == len(DOCUMENTS) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     documents_indexed = document_store.get_all_documents(batch_size=batch_size) | 
					
						
							|  |  |  |     assert len(documents_indexed) == len(DOCUMENTS) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.mark.weaviate | 
					
						
							|  |  |  | @pytest.mark.parametrize("document_store_with_docs", ["weaviate"], indirect=True) | 
					
						
							|  |  |  | def test_query_by_embedding(document_store_with_docs): | 
					
						
							|  |  |  |     docs = document_store_with_docs.query_by_embedding(np.random.rand(embedding_dim).astype(np.float32)) | 
					
						
							|  |  |  |     assert len(docs) == 3 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     docs = document_store_with_docs.query_by_embedding(np.random.rand(embedding_dim).astype(np.float32), top_k=1) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     assert len(docs) == 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     docs = document_store_with_docs.query_by_embedding( | 
					
						
							|  |  |  |         np.random.rand(embedding_dim).astype(np.float32), filters={"name": ["filename2"]} | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     assert len(docs) == 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  | @pytest.mark.weaviate | 
					
						
							|  |  |  | @pytest.mark.parametrize("document_store_with_docs", ["weaviate"], indirect=True) | 
					
						
							|  |  |  | def test_query(document_store_with_docs): | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     query_text = "My name is Carla and I live in Berlin" | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     with pytest.raises(Exception): | 
					
						
							|  |  |  |         docs = document_store_with_docs.query(query_text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     docs = document_store_with_docs.query(filters={"name": ["filename2"]}) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     assert len(docs) == 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     docs = document_store_with_docs.query(filters={"content": [query_text.lower()]}) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     assert len(docs) == 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-03 13:43:18 +01:00
										 |  |  |     docs = document_store_with_docs.query(filters={"content": ["live"]}) | 
					
						
							| 
									
										
										
										
											2021-06-10 13:13:53 +05:30
										 |  |  |     assert len(docs) == 3 |