Fix Search Indexes: Normalize display name to be lowercase, such that upper or lowercase searches can work (#10611)

* Fix Search Indexes: Normalize display name to be lowercase, such that upper or lowercase searches can work

* Fix Search Indexes: Normalize display name to be lowercase, such that upper or lowercase searches can work
This commit is contained in:
Sriharsha Chintalapani 2023-04-10 22:31:43 -07:00 committed by GitHub
parent 4084d3b0d5
commit a49b2b501a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 117 additions and 11 deletions

View File

@ -102,6 +102,7 @@ public class SearchResource {
private static final Integer MAX_RESULT_HITS = 10000; private static final Integer MAX_RESULT_HITS = 10000;
private static final String NAME_KEYWORD = "name.keyword"; private static final String NAME_KEYWORD = "name.keyword";
private static final String DISPLAY_NAME = "displayName"; private static final String DISPLAY_NAME = "displayName";
private static final String FIELD_NAME_NGRAM = "name.ngram";
private static final String DISPLAY_NAME_KEYWORD = "displayName.keyword"; private static final String DISPLAY_NAME_KEYWORD = "displayName.keyword";
private static final String FIELD_DISPLAY_NAME_NGRAM = "displayName.ngram"; private static final String FIELD_DISPLAY_NAME_NGRAM = "displayName.ngram";
private static final String QUERY = "query"; private static final String QUERY = "query";
@ -551,6 +552,7 @@ public class SearchResource {
.field(FIELD_DISPLAY_NAME, 15.0f) .field(FIELD_DISPLAY_NAME, 15.0f)
.field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_DISPLAY_NAME_NGRAM)
.field(FIELD_NAME, 15.0f) .field(FIELD_NAME, 15.0f)
.field(FIELD_NAME_NGRAM)
.field(DISPLAY_NAME_KEYWORD, 25.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f)
.field(NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f)
.field(FIELD_DESCRIPTION, 1.0f) .field(FIELD_DESCRIPTION, 1.0f)
@ -605,6 +607,7 @@ public class SearchResource {
.field(FIELD_DISPLAY_NAME, 15.0f) .field(FIELD_DISPLAY_NAME, 15.0f)
.field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_DISPLAY_NAME_NGRAM)
.field(FIELD_NAME, 15.0f) .field(FIELD_NAME, 15.0f)
.field(FIELD_NAME_NGRAM)
.field(DISPLAY_NAME_KEYWORD, 25.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f)
.field(NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f)
.field(FIELD_DESCRIPTION, 1.0f) .field(FIELD_DESCRIPTION, 1.0f)
@ -634,6 +637,7 @@ public class SearchResource {
.field(FIELD_DISPLAY_NAME, 15.0f) .field(FIELD_DISPLAY_NAME, 15.0f)
.field(FIELD_DISPLAY_NAME_NGRAM) .field(FIELD_DISPLAY_NAME_NGRAM)
.field(FIELD_NAME, 15.0f) .field(FIELD_NAME, 15.0f)
.field(FIELD_NAME_NGRAM)
.field(DISPLAY_NAME_KEYWORD, 25.0f) .field(DISPLAY_NAME_KEYWORD, 25.0f)
.field(NAME_KEYWORD, 25.0f) .field(NAME_KEYWORD, 25.0f)
.field(FIELD_DESCRIPTION, 1.0f) .field(FIELD_DESCRIPTION, 1.0f)
@ -839,6 +843,8 @@ public class SearchResource {
private SearchSourceBuilder buildGlossaryTermSearchBuilder(String query, int from, int size) { private SearchSourceBuilder buildGlossaryTermSearchBuilder(String query, int from, int size) {
QueryStringQueryBuilder queryBuilder = QueryStringQueryBuilder queryBuilder =
QueryBuilders.queryStringQuery(query) QueryBuilders.queryStringQuery(query)
.field(FIELD_DISPLAY_NAME, 10.0f)
.field(FIELD_DISPLAY_NAME_NGRAM, 1.0f)
.field(FIELD_NAME, 10.0f) .field(FIELD_NAME, 10.0f)
.field(NAME_KEYWORD, 10.0f) .field(NAME_KEYWORD, 10.0f)
.field(DISPLAY_NAME_KEYWORD, 10.0f) .field(DISPLAY_NAME_KEYWORD, 10.0f)
@ -855,6 +861,8 @@ public class SearchResource {
HighlightBuilder.Field highlightGlossaryName = new HighlightBuilder.Field(FIELD_NAME); HighlightBuilder.Field highlightGlossaryName = new HighlightBuilder.Field(FIELD_NAME);
highlightGlossaryName.highlighterType(UNIFIED); highlightGlossaryName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightGlossaryDisplayName = new HighlightBuilder.Field(FIELD_DISPLAY_NAME);
highlightGlossaryDisplayName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightDescription = new HighlightBuilder.Field(FIELD_DESCRIPTION); HighlightBuilder.Field highlightDescription = new HighlightBuilder.Field(FIELD_DESCRIPTION);
highlightDescription.highlighterType(UNIFIED); highlightDescription.highlighterType(UNIFIED);
HighlightBuilder.Field highlightSynonym = new HighlightBuilder.Field("synonyms"); HighlightBuilder.Field highlightSynonym = new HighlightBuilder.Field("synonyms");
@ -862,7 +870,9 @@ public class SearchResource {
HighlightBuilder hb = new HighlightBuilder(); HighlightBuilder hb = new HighlightBuilder();
hb.field(highlightDescription); hb.field(highlightDescription);
hb.field(highlightGlossaryName); hb.field(highlightGlossaryName);
hb.field(highlightGlossaryDisplayName);
hb.field(highlightSynonym); hb.field(highlightSynonym);
hb.preTags("<span class=\"text-highlighter\">"); hb.preTags("<span class=\"text-highlighter\">");
hb.postTags("</span>"); hb.postTags("</span>");
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder searchSourceBuilder =
@ -877,15 +887,20 @@ public class SearchResource {
QueryStringQueryBuilder queryBuilder = QueryStringQueryBuilder queryBuilder =
QueryBuilders.queryStringQuery(query) QueryBuilders.queryStringQuery(query)
.field(FIELD_NAME, 10.0f) .field(FIELD_NAME, 10.0f)
.field(FIELD_DISPLAY_NAME, 10.0f)
.field(FIELD_DISPLAY_NAME_NGRAM, 1.0f)
.field(DESCRIPTION, 3.0f) .field(DESCRIPTION, 3.0f)
.defaultOperator(Operator.AND) .defaultOperator(Operator.AND)
.fuzziness(Fuzziness.AUTO); .fuzziness(Fuzziness.AUTO);
HighlightBuilder.Field highlightTagName = new HighlightBuilder.Field(FIELD_NAME); HighlightBuilder.Field highlightTagName = new HighlightBuilder.Field(FIELD_NAME);
highlightTagName.highlighterType(UNIFIED); highlightTagName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightTagDisplayName = new HighlightBuilder.Field(FIELD_DISPLAY_NAME);
highlightTagDisplayName.highlighterType(UNIFIED);
HighlightBuilder.Field highlightDescription = new HighlightBuilder.Field(FIELD_DESCRIPTION); HighlightBuilder.Field highlightDescription = new HighlightBuilder.Field(FIELD_DESCRIPTION);
highlightDescription.highlighterType(UNIFIED); highlightDescription.highlighterType(UNIFIED);
HighlightBuilder hb = new HighlightBuilder(); HighlightBuilder hb = new HighlightBuilder();
hb.field(highlightTagDisplayName);
hb.field(highlightDescription); hb.field(highlightDescription);
hb.field(highlightTagName); hb.field(highlightTagName);
hb.preTags("<span class=\"text-highlighter\">"); hb.preTags("<span class=\"text-highlighter\">");

View File

@ -21,7 +21,10 @@
"om_ngram": { "om_ngram": {
"tokenizer": "ngram", "tokenizer": "ngram",
"min_gram": 1, "min_gram": 1,
"max_gram": 2 "max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {
@ -39,10 +42,15 @@
}, },
"name": { "name": {
"type": "text", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },
@ -57,6 +65,10 @@
"ngram": { "ngram": {
"type": "text", "type": "text",
"analyzer": "om_ngram" "analyzer": "om_ngram"
},
"keyword": {
"type": "keyword",
"ignore_above": 256
} }
} }
}, },

View File

@ -21,7 +21,10 @@
"om_ngram": { "om_ngram": {
"tokenizer": "ngram", "tokenizer": "ngram",
"min_gram": 1, "min_gram": 1,
"max_gram": 2 "max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {
@ -38,11 +41,16 @@
"type": "text" "type": "text"
}, },
"name": { "name": {
"type": "keyword", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },

View File

@ -21,7 +21,10 @@
"om_ngram": { "om_ngram": {
"tokenizer": "ngram", "tokenizer": "ngram",
"min_gram": 1, "min_gram": 1,
"max_gram": 2 "max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {
@ -39,10 +42,15 @@
}, },
"name": { "name": {
"type": "text", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },
@ -54,6 +62,10 @@
"type": "text", "type": "text",
"analyzer": "om_analyzer", "analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": { "ngram": {
"type": "text", "type": "text",
"analyzer": "om_ngram" "analyzer": "om_ngram"

View File

@ -17,6 +17,14 @@
"lowercase", "lowercase",
"om_stemmer" "om_stemmer"
] ]
},
"om_ngram": {
"tokenizer": "ngram",
"min_gram": 1,
"max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {
@ -47,7 +55,17 @@
}, },
"displayName": { "displayName": {
"type": "text", "type": "text",
"analyzer": "om_analyzer" "analyzer": "om_analyzer",
"fields": {
"ngram": {
"type": "text",
"analyzer": "om_ngram"
},
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}, },
"description": { "description": {
"type": "text", "type": "text",

View File

@ -21,7 +21,10 @@
"om_ngram": { "om_ngram": {
"tokenizer": "ngram", "tokenizer": "ngram",
"min_gram": 1, "min_gram": 1,
"max_gram": 2 "max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {
@ -39,10 +42,15 @@
}, },
"name": { "name": {
"type": "text", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },
@ -54,6 +62,10 @@
"type": "text", "type": "text",
"analyzer": "om_analyzer", "analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"ngram": { "ngram": {
"type": "text", "type": "text",
"analyzer": "om_ngram" "analyzer": "om_ngram"

View File

@ -21,7 +21,10 @@
"om_ngram": { "om_ngram": {
"tokenizer": "ngram", "tokenizer": "ngram",
"min_gram": 1, "min_gram": 1,
"max_gram": 2 "max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {
@ -38,11 +41,16 @@
"type": "text" "type": "text"
}, },
"name": { "name": {
"type": "keyword", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },
@ -52,10 +60,15 @@
}, },
"displayName": { "displayName": {
"type": "text", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },

View File

@ -1,6 +1,6 @@
{ {
"settings": { "settings": {
"analysis": { "analysis": {
"normalizer": { "normalizer": {
"lowercase_normalizer": { "lowercase_normalizer": {
"type": "custom", "type": "custom",
@ -39,10 +39,15 @@
}, },
"name": { "name": {
"type": "text", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },

View File

@ -21,7 +21,10 @@
"om_ngram": { "om_ngram": {
"tokenizer": "ngram", "tokenizer": "ngram",
"min_gram": 1, "min_gram": 1,
"max_gram": 2 "max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {
@ -39,10 +42,15 @@
}, },
"name": { "name": {
"type": "text", "type": "text",
"analyzer": "om_analyzer",
"fields": { "fields": {
"keyword": { "keyword": {
"type": "keyword", "type": "keyword",
"ignore_above": 256 "ignore_above": 256
},
"ngram": {
"type": "text",
"analyzer": "om_ngram"
} }
} }
}, },

View File

@ -21,7 +21,10 @@
"om_ngram": { "om_ngram": {
"tokenizer": "ngram", "tokenizer": "ngram",
"min_gram": 1, "min_gram": 1,
"max_gram": 2 "max_gram": 2,
"filter": [
"lowercase"
]
} }
}, },
"filter": { "filter": {