Ensure exact match when filtering by meta in Elasticsearch (#311)

This commit is contained in:
Tanay Soni 2020-08-13 11:42:49 +02:00 committed by GitHub
parent 4eeb7818af
commit 397dcf9d92
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -94,22 +94,34 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
self.update_existing_documents = update_existing_documents
def _create_document_index(self, index_name):
if self.client.indices.exists(index=index_name):
return
if self.custom_mapping:
mapping = self.custom_mapping
else:
mapping = {
"mappings": {
"properties": {
self.name_field: {"type": "text"},
self.name_field: {"type": "keyword"},
self.text_field: {"type": "text"},
}
},
"dynamic_templates": [
{
"strings": {
"path_match": "*",
"match_mapping_type": "string",
"mapping": {"type": "keyword"}}}
],
}
}
if self.embedding_field:
mapping["mappings"]["properties"][self.embedding_field] = {"type": "dense_vector", "dims": self.embedding_dim}
self.client.indices.create(index=index_name, ignore=400, body=mapping)
self.client.indices.create(index=index_name, body=mapping)
def _create_label_index(self, index_name):
if self.client.indices.exists(index=index_name):
return
mapping = {
"mappings": {
"properties": {
@ -126,7 +138,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
}
}
}
self.client.indices.create(index=index_name, ignore=400, body=mapping)
self.client.indices.create(index=index_name, body=mapping)
# TODO: Add flexibility to define other non-meta and meta fields expected by the Document class
def _create_document_field_map(self) -> Dict:
@ -443,7 +455,7 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
docs = self.get_all_documents(index)
passages = [d.text for d in docs]
#TODO Index embeddings every X batches to avoid OOM for huge document collections
logger.info(f"Updating embeddings for {len(passages)} docs ...")