mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-30 17:29:29 +00:00 
			
		
		
		
	Ignore cross-reference properties when loading documents (#4664)
* drop cross-reference properties * be more defensive * fix regression
This commit is contained in:
		
							parent
							
								
									dbe3049682
								
							
						
					
					
						commit
						a03e8335aa
					
				| @ -28,6 +28,20 @@ from haystack.nodes.retriever import DenseRetriever | |||||||
| 
 | 
 | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| UUID_PATTERN = re.compile(r"^[\da-f]{8}-([\da-f]{4}-){3}[\da-f]{12}$", re.IGNORECASE) | UUID_PATTERN = re.compile(r"^[\da-f]{8}-([\da-f]{4}-){3}[\da-f]{12}$", re.IGNORECASE) | ||||||
|  | SUPPORTED_PROPERTY_TYPES = { | ||||||
|  |     "string", | ||||||
|  |     "string[]", | ||||||
|  |     "int", | ||||||
|  |     "int[]", | ||||||
|  |     "boolean", | ||||||
|  |     "boolean[]", | ||||||
|  |     "number", | ||||||
|  |     "number[]", | ||||||
|  |     "date", | ||||||
|  |     "date[]", | ||||||
|  |     "text", | ||||||
|  |     "text[]", | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class WeaviateDocumentStoreError(DocumentStoreError): | class WeaviateDocumentStoreError(DocumentStoreError): | ||||||
| @ -391,13 +405,21 @@ class WeaviateDocumentStore(KeywordDocumentStore): | |||||||
| 
 | 
 | ||||||
|     def _get_current_properties(self, index: Optional[str] = None) -> List[str]: |     def _get_current_properties(self, index: Optional[str] = None) -> List[str]: | ||||||
|         """ |         """ | ||||||
|         Get all the existing properties in the schema. |         Get all the existing properties in the schema, excluding those with complex types | ||||||
|  |         like cross-reference | ||||||
|         """ |         """ | ||||||
|         index = self._sanitize_index_name(index) or self.index |         index = self._sanitize_index_name(index) or self.index | ||||||
|         cur_properties = [] |         cur_properties = [] | ||||||
|         for class_item in self.weaviate_client.schema.get()["classes"]: |         for class_item in self.weaviate_client.schema.get()["classes"]: | ||||||
|             if class_item["class"] == index: |             if class_item["class"] == index: | ||||||
|                 cur_properties = [item["name"] for item in class_item["properties"]] |                 cur_properties = [ | ||||||
|  |                     item["name"] | ||||||
|  |                     for item in class_item.get("properties", []) | ||||||
|  |                     # dataType should be always there and contain only one item unless | ||||||
|  |                     # it's a cross-reference but here we try to be defensive against | ||||||
|  |                     # unexpected schemas | ||||||
|  |                     if set(item.get("dataType", [])).issubset(SUPPORTED_PROPERTY_TYPES) | ||||||
|  |                 ] | ||||||
| 
 | 
 | ||||||
|         return cur_properties |         return cur_properties | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,16 +1,14 @@ | |||||||
|  | import uuid | ||||||
|  | import json | ||||||
|  | from unittest import mock | ||||||
|  | 
 | ||||||
| import pytest | import pytest | ||||||
|  | import numpy as np | ||||||
| 
 | 
 | ||||||
| from haystack.document_stores.weaviate import WeaviateDocumentStore | from haystack.document_stores.weaviate import WeaviateDocumentStore | ||||||
| from haystack.schema import Document | from haystack.schema import Document | ||||||
| from haystack.testing import DocumentStoreBaseTestAbstract | from haystack.testing import DocumentStoreBaseTestAbstract | ||||||
| 
 | 
 | ||||||
| import uuid |  | ||||||
| from unittest.mock import MagicMock |  | ||||||
| 
 |  | ||||||
| import numpy as np |  | ||||||
| import pytest |  | ||||||
| 
 |  | ||||||
| from haystack.schema import Document |  | ||||||
| 
 | 
 | ||||||
| embedding_dim = 768 | embedding_dim = 768 | ||||||
| 
 | 
 | ||||||
| @ -207,7 +205,7 @@ class TestWeaviateDocumentStore(DocumentStoreBaseTestAbstract): | |||||||
|         ds.write_documents(documents) |         ds.write_documents(documents) | ||||||
|         # This test verifies that deleting an object by its ID does not first require fetching all documents. This fixes |         # This test verifies that deleting an object by its ID does not first require fetching all documents. This fixes | ||||||
|         # a bug, as described in https://github.com/deepset-ai/haystack/issues/2898 |         # a bug, as described in https://github.com/deepset-ai/haystack/issues/2898 | ||||||
|         ds.get_all_documents = MagicMock(wraps=ds.get_all_documents) |         ds.get_all_documents = mock.MagicMock(wraps=ds.get_all_documents) | ||||||
| 
 | 
 | ||||||
|         assert ds.get_document_count() == 9 |         assert ds.get_document_count() == 9 | ||||||
| 
 | 
 | ||||||
| @ -258,3 +256,30 @@ class TestWeaviateDocumentStore(DocumentStoreBaseTestAbstract): | |||||||
|         """ |         """ | ||||||
|         ds.write_documents(documents) |         ds.write_documents(documents) | ||||||
|         assert ds.get_embedding_count() == 9 |         assert ds.get_embedding_count() == 9 | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.unit | ||||||
|  |     def test__get_current_properties(self): | ||||||
|  |         with mock.patch("haystack.document_stores.weaviate.client") as mocked_client: | ||||||
|  |             mocked_client.Client().is_ready.return_value = True | ||||||
|  |             mocked_client.Client().schema.contains.return_value = False | ||||||
|  |             mocked_client.Client().schema.get.return_value = json.loads( | ||||||
|  |                 """ | ||||||
|  | { | ||||||
|  |   "classes": [{ | ||||||
|  |     "class": "Document", | ||||||
|  |     "properties": [ | ||||||
|  |         { | ||||||
|  |         "name": "hasWritten", | ||||||
|  |         "dataType": ["Article"] | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |         "name": "hitCounter", | ||||||
|  |         "dataType": ["int"] | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  |   }] | ||||||
|  | } """ | ||||||
|  |             ) | ||||||
|  |             ds = WeaviateDocumentStore() | ||||||
|  |             # Ensure we dropped the cross-reference property | ||||||
|  |             assert ds._get_current_properties() == ["hitCounter"] | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Massimiliano Pippi
						Massimiliano Pippi