Fix Search Indexes: Normalize display name to be lowercase, such that upper or lowercase searches can work (#10611)

* Fix Search Indexes: Normalize display name to be lowercase, such that upper or lowercase searches can work

* Fix Search Indexes: Normalize display name to be lowercase, such that upper or lowercase searches can work
This commit is contained in:
Sriharsha Chintalapani 2023-04-10 22:31:43 -07:00 committed by GitHub
parent 4084d3b0d5
commit a49b2b501a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 117 additions and 11 deletions

View File

@ -102,6 +102,7 @@ public class SearchResource {
private static final Integer MAX_RESULT_HITS = 10000;
private static final String NAME_KEYWORD = "name.keyword";
private static final String DISPLAY_NAME = "displayName";
private static final String FIELD_NAME_NGRAM = "name.ngram";
private static final String DISPLAY_NAME_KEYWORD = "displayName.keyword";
private static final String FIELD_DISPLAY_NAME_NGRAM = "displayName.ngram";
private static final String QUERY = "query";
@ -551,6 +552,7 @@ public class SearchResource {
.field(FIELD_DISPLAY_NAME, 15.0f)
.field(FIELD_DISPLAY_NAME_NGRAM)
.field(FIELD_NAME, 15.0f)
.field(FIELD_NAME_NGRAM)
.field(DISPLAY_NAME_KEYWORD, 25.0f)
.field(NAME_KEYWORD, 25.0f)
.field(FIELD_DESCRIPTION, 1.0f)
@ -605,6 +607,7 @@ public class SearchResource {
.field(FIELD_DISPLAY_NAME, 15.0f)
.field(FIELD_DISPLAY_NAME_NGRAM)
.field(FIELD_NAME, 15.0f)
.field(FIELD_NAME_NGRAM)
.field(DISPLAY_NAME_KEYWORD, 25.0f)
.field(NAME_KEYWORD, 25.0f)
.field(FIELD_DESCRIPTION, 1.0f)
@ -634,6 +637,7 @@ public class SearchResource {
.field(FIELD_DISPLAY_NAME, 15.0f)
.field(FIELD_DISPLAY_NAME_NGRAM)
.field(FIELD_NAME, 15.0f)
.field(FIELD_NAME_NGRAM)
.field(DISPLAY_NAME_KEYWORD, 25.0f)
.field(NAME_KEYWORD, 25.0f)
.field(FIELD_DESCRIPTION, 1.0f)
@ -839,6 +843,8 @@ public class SearchResource {
private SearchSourceBuilder buildGlossaryTermSearchBuilder(String query, int from, int size) {
QueryStringQueryBuilder queryBuilder =
QueryBuilders.queryStringQuery(query)
.field(FIELD_DISPLAY_NAME, 10.0f)
.field(FIELD_DISPLAY_NAME_NGRAM, 1.0f)
.field(FIELD_NAME, 10.0f)
.field(NAME_KEYWORD, 10.0f)
.field(DISPLAY_NAME_KEYWORD, 10.0f)
@ -855,6 +861,8 @@ public class SearchResource {
HighlightBuilder.Field highlightGlossaryName = new HighlightBuilder.Field(FIELD_NAME);
highlightGlossaryName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightGlossaryDisplayName = new HighlightBuilder.Field(FIELD_DISPLAY_NAME);
highlightGlossaryDisplayName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightDescription = new HighlightBuilder.Field(FIELD_DESCRIPTION);
highlightDescription.highlighterType(UNIFIED);
HighlightBuilder.Field highlightSynonym = new HighlightBuilder.Field("synonyms");
@ -862,7 +870,9 @@ public class SearchResource {
HighlightBuilder hb = new HighlightBuilder();
hb.field(highlightDescription);
hb.field(highlightGlossaryName);
hb.field(highlightGlossaryDisplayName);
hb.field(highlightSynonym);
hb.preTags("<span class=\"text-highlighter\">");
hb.postTags("</span>");
SearchSourceBuilder searchSourceBuilder =
@ -877,15 +887,20 @@ public class SearchResource {
QueryStringQueryBuilder queryBuilder =
QueryBuilders.queryStringQuery(query)
.field(FIELD_NAME, 10.0f)
.field(FIELD_DISPLAY_NAME, 10.0f)
.field(FIELD_DISPLAY_NAME_NGRAM, 1.0f)
.field(DESCRIPTION, 3.0f)
.defaultOperator(Operator.AND)
.fuzziness(Fuzziness.AUTO);
HighlightBuilder.Field highlightTagName = new HighlightBuilder.Field(FIELD_NAME);
highlightTagName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightTagDisplayName = new HighlightBuilder.Field(FIELD_DISPLAY_NAME);
highlightTagDisplayName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightDescription = new HighlightBuilder.Field(FIELD_DESCRIPTION);
highlightDescription.highlighterType(UNIFIED);
HighlightBuilder hb = new HighlightBuilder();
hb.field(highlightTagDisplayName);
hb.field(highlightDescription);
hb.field(highlightTagName);
hb.preTags("<span class=\"text-highlighter\">");

View File

@ -21,7 +21,10 @@
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {
@ -39,10 +42,15 @@
},
"name": {
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},
@ -57,6 +65,10 @@
"ngram": {
"type": "text",
"analyzer": "om_ngram"
},
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},

View File

@ -21,7 +21,10 @@
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {
@ -38,11 +41,16 @@
"type": "text"
},
"name": {
"type": "keyword",
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},

View File

@ -21,7 +21,10 @@
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {
@ -39,10 +42,15 @@
},
"name": {
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},
@ -54,6 +62,10 @@
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"

View File

@ -17,6 +17,14 @@
"lowercase",
"om_stemmer"
]
},
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {
@ -47,7 +55,17 @@
},
"displayName": {
"type": "text",
"analyzer": "om_analyzer"
"analyzer": "om_analyzer",
"fields": {
"ngram": {
"type": "text",
"analyzer": "om_ngram"
},
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"description": {
"type": "text",

View File

@ -21,7 +21,10 @@
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {
@ -39,10 +42,15 @@
},
"name": {
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},
@ -54,6 +62,10 @@
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"

View File

@ -21,7 +21,10 @@
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {
@ -38,11 +41,16 @@
"type": "text"
},
"name": {
"type": "keyword",
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},
@ -52,10 +60,15 @@
},
"displayName": {
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},

View File

@ -1,6 +1,6 @@
{
"settings": {
"analysis": {
"analysis": {
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
@ -39,10 +39,15 @@
},
"name": {
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},

View File

@ -21,7 +21,10 @@
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {
@ -39,10 +42,15 @@
},
"name": {
"type": "text",
"analyzer": "om_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
}
}
},

View File

@ -21,7 +21,10 @@
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2
"max_gram": 2,
"filter": [
"lowercase"
]
}
},
"filter": {