mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-28 23:48:53 +00:00
Fixed the Search Field mapping in ElasticSearch DocumentStore (#2080)
* Review changes * Added the synonym analyser for search fields * Added the review requests. * Added the synonyms the OpenSearchDocumentStore and review requests.
This commit is contained in:
parent
bbb65a19bd
commit
7d769d8bf1
@ -281,7 +281,7 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
|
||||
"mappings": {
|
||||
"properties": {
|
||||
self.name_field: {"type": "keyword"},
|
||||
self.content_field: {"type": "text"},
|
||||
self.content_field: {"type": "text"}
|
||||
},
|
||||
"dynamic_templates": [
|
||||
{
|
||||
@ -301,13 +301,21 @@ class ElasticsearchDocumentStore(KeywordDocumentStore):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.synonyms:
|
||||
for field in self.search_fields:
|
||||
mapping["mappings"]["properties"].update({field: {"type": "text", "analyzer": "synonym"}})
|
||||
mapping["mappings"]["properties"][self.content_field] = {"type": "text", "analyzer": "synonym"}
|
||||
|
||||
mapping["settings"]["analysis"]["analyzer"]["synonym"] = {"tokenizer": "whitespace",
|
||||
"filter": ["lowercase",
|
||||
"synonym"]}
|
||||
mapping["settings"]["analysis"]["filter"] = {"synonym": {"type": self.synonym_type, "synonyms": self.synonyms}}
|
||||
|
||||
else:
|
||||
for field in self.search_fields:
|
||||
mapping["mappings"]["properties"].update({field: {"type": "text"}})
|
||||
|
||||
if self.embedding_field:
|
||||
mapping["mappings"]["properties"][self.embedding_field] = {"type": "dense_vector", "dims": self.embedding_dim}
|
||||
|
||||
@ -1353,7 +1361,7 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore):
|
||||
"mappings": {
|
||||
"properties": {
|
||||
self.name_field: {"type": "keyword"},
|
||||
self.content_field: {"type": "text"},
|
||||
self.content_field: {"type": "text"}
|
||||
},
|
||||
"dynamic_templates": [
|
||||
{
|
||||
@ -1373,6 +1381,21 @@ class OpenSearchDocumentStore(ElasticsearchDocumentStore):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.synonyms:
|
||||
for field in self.search_fields:
|
||||
mapping["mappings"]["properties"].update({field: {"type": "text", "analyzer": "synonym"}})
|
||||
mapping["mappings"]["properties"][self.content_field] = {"type": "text", "analyzer": "synonym"}
|
||||
|
||||
mapping["settings"]["analysis"]["analyzer"]["synonym"] = {"tokenizer": "whitespace",
|
||||
"filter": ["lowercase",
|
||||
"synonym"]}
|
||||
mapping["settings"]["analysis"]["filter"] = {"synonym": {"type": self.synonym_type, "synonyms": self.synonyms}}
|
||||
|
||||
else:
|
||||
for field in self.search_fields:
|
||||
mapping["mappings"]["properties"].update({field: {"type": "text"}})
|
||||
|
||||
if self.embedding_field:
|
||||
|
||||
if self.similarity == "cosine":
|
||||
|
||||
@ -1196,4 +1196,30 @@ def test_DeepsetCloudDocumentStore_query_by_embedding(deepset_cloud_document_sto
|
||||
responses.add_passthru(DC_API_ENDPOINT)
|
||||
|
||||
emb_docs = deepset_cloud_document_store.query_by_embedding(query_emb)
|
||||
assert len(emb_docs) == 0
|
||||
assert len(emb_docs) == 0
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
def test_elasticsearch_search_field_mapping():
|
||||
|
||||
client = Elasticsearch()
|
||||
client.indices.delete(index='haystack_search_field_mapping', ignore=[404])
|
||||
|
||||
index_data = [
|
||||
{"title": "Green tea components",
|
||||
"meta": {"content": "The green tea plant contains a range of healthy compounds that make it into the final drink","sub_content":"Drink tip"},"id": "1"},
|
||||
{"title": "Green tea catechin",
|
||||
"meta": {"content": "Green tea contains a catechin called epigallocatechin-3-gallate (EGCG).","sub_content":"Ingredients tip"}, "id": "2"},
|
||||
{"title": "Minerals in Green tea",
|
||||
"meta": {"content": "Green tea also has small amounts of minerals that can benefit your health.","sub_content":"Minerals tip"}, "id": "3"},
|
||||
{"title": "Green tea Benefits",
|
||||
"meta": {"content": "Green tea does more than just keep you alert, it may also help boost brain function.","sub_content":"Health tip"},"id": "4"}
|
||||
]
|
||||
|
||||
document_store = ElasticsearchDocumentStore(index="haystack_search_field_mapping",search_fields=["content", "sub_content"],content_field= "title")
|
||||
document_store.write_documents(index_data)
|
||||
|
||||
indexed_settings = client.indices.get_mapping(index="haystack_search_field_mapping")
|
||||
|
||||
assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["content"]["type"] == 'text'
|
||||
assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["sub_content"]["type"] == 'text'
|
||||
Loading…
x
Reference in New Issue
Block a user