mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 09:49:48 +00:00 
			
		
		
		
	 7498c7c6fb
			
		
	
	
		7498c7c6fb
		
			
		
	
	
	
	
		
			
			* use delete_index instead of delete_documents in tests * fix delete_index * fix delete_index() in memory and milvus * fix imports * fix memory keyerrors * Update Documentation & Code Style * increase timeout for pinecone tests to 60 minutes * clean get_document_store() * use recreate_index in tests * Update Documentation & Code Style * fix tests * fix remaining tests * log index deleted * fix test_eval_pipeline * simplify existing index detection in weaviate * delete label_index on recreate_index for pinecone and milvus * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
		
			
				
	
	
		
			108 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			108 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import numpy as np
 | |
| import pytest
 | |
| from haystack.schema import Document
 | |
| from .conftest import get_document_store
 | |
| import uuid
 | |
| 
 | |
| embedding_dim = 768
 | |
| 
 | |
| 
 | |
| def get_uuid():
 | |
|     return str(uuid.uuid4())
 | |
| 
 | |
| 
 | |
| DOCUMENTS = [
 | |
|     {"content": "text1", "id": "not a correct uuid", "key": "a"},
 | |
|     {"content": "text2", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)},
 | |
|     {"content": "text3", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)},
 | |
|     {"content": "text4", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)},
 | |
|     {"content": "text5", "id": get_uuid(), "key": "b", "embedding": np.random.rand(embedding_dim).astype(np.float32)},
 | |
| ]
 | |
| 
 | |
| DOCUMENTS_XS = [
 | |
|     # current "dict" format for a document
 | |
|     {
 | |
|         "content": "My name is Carla and I live in Berlin",
 | |
|         "id": get_uuid(),
 | |
|         "meta": {"metafield": "test1", "name": "filename1"},
 | |
|         "embedding": np.random.rand(embedding_dim).astype(np.float32),
 | |
|     },
 | |
|     # meta_field at the top level for backward compatibility
 | |
|     {
 | |
|         "content": "My name is Paul and I live in New York",
 | |
|         "id": get_uuid(),
 | |
|         "metafield": "test2",
 | |
|         "name": "filename2",
 | |
|         "embedding": np.random.rand(embedding_dim).astype(np.float32),
 | |
|     },
 | |
|     # Document object for a doc
 | |
|     Document(
 | |
|         content="My name is Christelle and I live in Paris",
 | |
|         id=get_uuid(),
 | |
|         meta={"metafield": "test3", "name": "filename3"},
 | |
|         embedding=np.random.rand(embedding_dim).astype(np.float32),
 | |
|     ),
 | |
| ]
 | |
| 
 | |
| 
 | |
| @pytest.fixture(params=["weaviate"])
 | |
| def document_store_with_docs(request, tmp_path):
 | |
|     document_store = get_document_store(request.param, tmp_path=tmp_path)
 | |
|     document_store.write_documents(DOCUMENTS_XS)
 | |
|     yield document_store
 | |
|     document_store.delete_index(document_store.index)
 | |
| 
 | |
| 
 | |
| @pytest.fixture(params=["weaviate"])
 | |
| def document_store(request, tmp_path):
 | |
|     document_store = get_document_store(request.param, tmp_path=tmp_path)
 | |
|     yield document_store
 | |
|     document_store.delete_index(document_store.index)
 | |
| 
 | |
| 
 | |
| @pytest.mark.weaviate
 | |
| @pytest.mark.parametrize("document_store", ["weaviate"], indirect=True)
 | |
| @pytest.mark.parametrize("batch_size", [2])
 | |
| def test_weaviate_write_docs(document_store, batch_size):
 | |
|     # Write in small batches
 | |
|     for i in range(0, len(DOCUMENTS), batch_size):
 | |
|         document_store.write_documents(DOCUMENTS[i : i + batch_size])
 | |
| 
 | |
|     documents_indexed = document_store.get_all_documents()
 | |
|     assert len(documents_indexed) == len(DOCUMENTS)
 | |
| 
 | |
|     documents_indexed = document_store.get_all_documents(batch_size=batch_size)
 | |
|     assert len(documents_indexed) == len(DOCUMENTS)
 | |
| 
 | |
| 
 | |
| @pytest.mark.weaviate
 | |
| @pytest.mark.parametrize("document_store_with_docs", ["weaviate"], indirect=True)
 | |
| def test_query_by_embedding(document_store_with_docs):
 | |
|     docs = document_store_with_docs.query_by_embedding(np.random.rand(embedding_dim).astype(np.float32))
 | |
|     assert len(docs) == 3
 | |
| 
 | |
|     docs = document_store_with_docs.query_by_embedding(np.random.rand(embedding_dim).astype(np.float32), top_k=1)
 | |
|     assert len(docs) == 1
 | |
| 
 | |
|     docs = document_store_with_docs.query_by_embedding(
 | |
|         np.random.rand(embedding_dim).astype(np.float32), filters={"name": ["filename2"]}
 | |
|     )
 | |
|     assert len(docs) == 1
 | |
| 
 | |
| 
 | |
| @pytest.mark.weaviate
 | |
| @pytest.mark.parametrize("document_store_with_docs", ["weaviate"], indirect=True)
 | |
| def test_query(document_store_with_docs):
 | |
|     query_text = "My name is Carla and I live in Berlin"
 | |
|     with pytest.raises(Exception):
 | |
|         docs = document_store_with_docs.query(query_text)
 | |
| 
 | |
|     docs = document_store_with_docs.query(filters={"name": ["filename2"]})
 | |
|     assert len(docs) == 1
 | |
| 
 | |
|     docs = document_store_with_docs.query(filters={"content": [query_text.lower()]})
 | |
|     assert len(docs) == 1
 | |
| 
 | |
|     docs = document_store_with_docs.query(filters={"content": ["live"]})
 | |
|     assert len(docs) == 3
 |