mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-25 22:46:21 +00:00
Ensure exact match when filtering by meta in Elasticsearch (#311)
This commit is contained in:
parent
4eeb7818af
commit
397dcf9d92
@ -94,22 +94,34 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
|||||||
self.update_existing_documents = update_existing_documents
|
self.update_existing_documents = update_existing_documents
|
||||||
|
|
||||||
def _create_document_index(self, index_name):
|
def _create_document_index(self, index_name):
|
||||||
|
if self.client.indices.exists(index=index_name):
|
||||||
|
return
|
||||||
|
|
||||||
if self.custom_mapping:
|
if self.custom_mapping:
|
||||||
mapping = self.custom_mapping
|
mapping = self.custom_mapping
|
||||||
else:
|
else:
|
||||||
mapping = {
|
mapping = {
|
||||||
"mappings": {
|
"mappings": {
|
||||||
"properties": {
|
"properties": {
|
||||||
self.name_field: {"type": "text"},
|
self.name_field: {"type": "keyword"},
|
||||||
self.text_field: {"type": "text"},
|
self.text_field: {"type": "text"},
|
||||||
}
|
},
|
||||||
|
"dynamic_templates": [
|
||||||
|
{
|
||||||
|
"strings": {
|
||||||
|
"path_match": "*",
|
||||||
|
"match_mapping_type": "string",
|
||||||
|
"mapping": {"type": "keyword"}}}
|
||||||
|
],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if self.embedding_field:
|
if self.embedding_field:
|
||||||
mapping["mappings"]["properties"][self.embedding_field] = {"type": "dense_vector", "dims": self.embedding_dim}
|
mapping["mappings"]["properties"][self.embedding_field] = {"type": "dense_vector", "dims": self.embedding_dim}
|
||||||
self.client.indices.create(index=index_name, ignore=400, body=mapping)
|
self.client.indices.create(index=index_name, body=mapping)
|
||||||
|
|
||||||
def _create_label_index(self, index_name):
|
def _create_label_index(self, index_name):
|
||||||
|
if self.client.indices.exists(index=index_name):
|
||||||
|
return
|
||||||
mapping = {
|
mapping = {
|
||||||
"mappings": {
|
"mappings": {
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -126,7 +138,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.client.indices.create(index=index_name, ignore=400, body=mapping)
|
self.client.indices.create(index=index_name, body=mapping)
|
||||||
|
|
||||||
# TODO: Add flexibility to define other non-meta and meta fields expected by the Document class
|
# TODO: Add flexibility to define other non-meta and meta fields expected by the Document class
|
||||||
def _create_document_field_map(self) -> Dict:
|
def _create_document_field_map(self) -> Dict:
|
||||||
@ -443,7 +455,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
|||||||
|
|
||||||
docs = self.get_all_documents(index)
|
docs = self.get_all_documents(index)
|
||||||
passages = [d.text for d in docs]
|
passages = [d.text for d in docs]
|
||||||
|
|
||||||
#TODO Index embeddings every X batches to avoid OOM for huge document collections
|
#TODO Index embeddings every X batches to avoid OOM for huge document collections
|
||||||
logger.info(f"Updating embeddings for {len(passages)} docs ...")
|
logger.info(f"Updating embeddings for {len(passages)} docs ...")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user