From 3e9288be9ac7e05a059ed9288b361c0a548f564d Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Thu, 4 May 2023 00:16:06 -0700 Subject: [PATCH] Add Description Ngram for search (#11395) * Add Description Ngram for search * Fix glosssary aggregation --- .../resources/search/SearchResource.java | 12 ++++++++-- .../en/container_index_mapping.json | 24 ++++++++++++++++--- .../en/dashboard_index_mapping.json | 16 +++++++++++-- .../en/glossary_index_mapping.json | 9 ++++++- .../en/mlmodel_index_mapping.json | 8 ++++++- .../en/pipeline_index_mapping.json | 8 ++++++- .../elasticsearch/en/query_index_mapping.json | 8 +++++-- .../elasticsearch/en/table_index_mapping.json | 8 +++++-- .../elasticsearch/en/tag_index_mapping.json | 8 ++++++- .../elasticsearch/en/topic_index_mapping.json | 8 ++++++- 10 files changed, 93 insertions(+), 16 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java index 4085b7914ef..1c66d144b2b 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java @@ -105,6 +105,7 @@ public class SearchResource { private static final String FIELD_NAME_NGRAM = "name.ngram"; private static final String DISPLAY_NAME_KEYWORD = "displayName.keyword"; private static final String FIELD_DISPLAY_NAME_NGRAM = "displayName.ngram"; + private static final String FIELD_DESCRIPTION_NGRAM = "description.ngram"; private static final String QUERY = "query"; private static final String QUERY_NGRAM = "query.ngram"; private static final String DESCRIPTION = "description"; @@ -556,6 +557,7 @@ public class SearchResource { .field(DISPLAY_NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f) .field(FIELD_DESCRIPTION, 1.0f) + .field(FIELD_DESCRIPTION_NGRAM, 1.0f) .field("columns.name.keyword", 10.0f) .field("columns.name", 2.0f) .field("columns.name.ngram") @@ -608,6 +610,7 @@ public class SearchResource { .field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_NAME, 15.0f) .field(FIELD_NAME_NGRAM) + .field(FIELD_DESCRIPTION_NGRAM, 1.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f) .field(FIELD_DESCRIPTION, 1.0f) @@ -638,6 +641,7 @@ public class SearchResource { .field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_NAME, 15.0f) .field(FIELD_NAME_NGRAM) + .field(FIELD_DESCRIPTION_NGRAM, 1.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f) .field(FIELD_DESCRIPTION, 1.0f) @@ -670,6 +674,7 @@ public class SearchResource { .field(FIELD_DISPLAY_NAME, 15.0f) .field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_NAME, 15.0f) + .field(FIELD_DESCRIPTION_NGRAM, 1.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f) .field(DESCRIPTION, 1.0f) @@ -700,6 +705,7 @@ public class SearchResource { .field(FIELD_DISPLAY_NAME, 15.0f) .field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_NAME, 15.0f) + .field(FIELD_DESCRIPTION_NGRAM, 1.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f) .field(DESCRIPTION, 1.0f) @@ -731,6 +737,7 @@ public class SearchResource { .field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_NAME, 15.0f) .field(FIELD_DESCRIPTION, 1.0f) + .field(FIELD_DESCRIPTION_NGRAM, 1.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f) .field("dataModel.columns.name", 2.0f) @@ -772,7 +779,8 @@ public class SearchResource { .field(FIELD_DISPLAY_NAME_NGRAM) .field(QUERY, 10.0f) .field(QUERY_NGRAM) - .field(DESCRIPTION, 3.0f) + .field(DESCRIPTION, 1.0f) + .field(FIELD_DESCRIPTION_NGRAM, 1.0f) .defaultOperator(Operator.AND) .fuzziness(Fuzziness.AUTO); @@ -879,7 +887,7 @@ public class SearchResource { new SearchSourceBuilder().query(queryBuilder).highlighter(hb).from(from).size(size); searchSourceBuilder .aggregation(AggregationBuilders.terms("tags.tagFQN").field("tags.tagFQN").size(MAX_AGGREGATE_SIZE)) - .aggregation(AggregationBuilders.terms("glossary.name").field("glossary.name.keyword")); + .aggregation(AggregationBuilders.terms("glossary.name.keyword").field("glossary.name.keyword")); return searchSourceBuilder; } diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/container_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/container_index_mapping.json index edbae0c5286..2e971fb41cd 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/container_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/container_index_mapping.json @@ -67,7 +67,12 @@ "type": "text", "index_options": "docs", "analyzer": "om_analyzer", - "norms": false + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" @@ -147,7 +152,12 @@ "type": "text", "index_options": "docs", "analyzer": "om_analyzer", - "norms": false + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "fullyQualifiedName": { "type": "text" @@ -205,7 +215,15 @@ "type": "text" }, "description": { - "type": "text" + "type": "text", + "index_options": "docs", + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "deleted": { "type": "text" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/dashboard_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/dashboard_index_mapping.json index 03f7f07ef24..c92215c4ed3 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/dashboard_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/dashboard_index_mapping.json @@ -74,7 +74,13 @@ }, "description": { "type": "text", - "analyzer": "om_analyzer" + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" @@ -129,7 +135,13 @@ }, "description": { "type": "text", - "analyzer": "om_analyzer" + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "deleted": { "type": "text" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/glossary_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/glossary_index_mapping.json index a86882f0f48..3b4d3a7ecdb 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/glossary_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/glossary_index_mapping.json @@ -73,7 +73,14 @@ } }, "description": { - "type": "text" + "type": "text", + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json index a4312d6374f..0241bab83e2 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json @@ -74,7 +74,13 @@ }, "description": { "type": "text", - "analyzer": "om_analyzer" + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/pipeline_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/pipeline_index_mapping.json index 47dc26a00c8..87193a7ac50 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/pipeline_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/pipeline_index_mapping.json @@ -69,7 +69,13 @@ }, "description": { "type": "text", - "analyzer": "om_analyzer" + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/query_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/query_index_mapping.json index b8220c475eb..90c4eac3c0d 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/query_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/query_index_mapping.json @@ -65,9 +65,13 @@ }, "description": { "type": "text", - "index_options": "docs", "analyzer": "om_analyzer", - "norms": false + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/table_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/table_index_mapping.json index 4db35d3c177..8a779c8fb06 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/table_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/table_index_mapping.json @@ -74,9 +74,13 @@ }, "description": { "type": "text", - "index_options": "docs", "analyzer": "om_analyzer", - "norms": false + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/tag_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/tag_index_mapping.json index b5668efeed8..9ff5da6a4c8 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/tag_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/tag_index_mapping.json @@ -73,7 +73,13 @@ } }, "description": { - "type": "text" + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float" diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/topic_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/topic_index_mapping.json index cbdc7b82adb..0ad2851a033 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/topic_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/topic_index_mapping.json @@ -70,7 +70,13 @@ }, "description": { "type": "text", - "analyzer": "om_analyzer" + "analyzer": "om_analyzer", + "fields": { + "ngram": { + "type": "text", + "analyzer": "om_ngram" + } + } }, "version": { "type": "float"