mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-03 19:29:32 +00:00
Ensure exact match when filtering by meta in Elasticsearch (#311)
This commit is contained in:
parent
4eeb7818af
commit
397dcf9d92
@ -94,22 +94,34 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
self.update_existing_documents = update_existing_documents
|
||||
|
||||
def _create_document_index(self, index_name):
|
||||
if self.client.indices.exists(index=index_name):
|
||||
return
|
||||
|
||||
if self.custom_mapping:
|
||||
mapping = self.custom_mapping
|
||||
else:
|
||||
mapping = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
self.name_field: {"type": "text"},
|
||||
self.name_field: {"type": "keyword"},
|
||||
self.text_field: {"type": "text"},
|
||||
}
|
||||
},
|
||||
"dynamic_templates": [
|
||||
{
|
||||
"strings": {
|
||||
"path_match": "*",
|
||||
"match_mapping_type": "string",
|
||||
"mapping": {"type": "keyword"}}}
|
||||
],
|
||||
}
|
||||
}
|
||||
if self.embedding_field:
|
||||
mapping["mappings"]["properties"][self.embedding_field] = {"type": "dense_vector", "dims": self.embedding_dim}
|
||||
self.client.indices.create(index=index_name, ignore=400, body=mapping)
|
||||
self.client.indices.create(index=index_name, body=mapping)
|
||||
|
||||
def _create_label_index(self, index_name):
|
||||
if self.client.indices.exists(index=index_name):
|
||||
return
|
||||
mapping = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
@ -126,7 +138,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
}
|
||||
}
|
||||
}
|
||||
self.client.indices.create(index=index_name, ignore=400, body=mapping)
|
||||
self.client.indices.create(index=index_name, body=mapping)
|
||||
|
||||
# TODO: Add flexibility to define other non-meta and meta fields expected by the Document class
|
||||
def _create_document_field_map(self) -> Dict:
|
||||
@ -443,7 +455,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
|
||||
docs = self.get_all_documents(index)
|
||||
passages = [d.text for d in docs]
|
||||
|
||||
|
||||
#TODO Index embeddings every X batches to avoid OOM for huge document collections
|
||||
logger.info(f"Updating embeddings for {len(passages)} docs ...")
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user