Added IEQUAL operator to support case insensitive searches (#11501)

Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
This commit is contained in:
Nbagga14 2024-10-05 00:35:29 +05:30 committed by GitHub
parent 7c6d31ca01
commit cc63f53c6a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 145 additions and 10 deletions

View File

@ -521,6 +521,11 @@ enum FilterOperator {
"""
EQUAL
"""
Represent the relation: field = value (case-insensitive), e.g. platform = HDFS
"""
IEQUAL
"""
* Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"]
"""
@ -575,6 +580,7 @@ enum FilterOperator {
Represent the relation: URN field matches any nested child or parent in addition to the given URN
"""
RELATED_INCL
}
"""

View File

@ -1203,6 +1203,7 @@ where valid conditions include
- CONTAIN
- END_WITH
- EQUAL
- IEQUAL (Supports case insensitive equals)
- GREATER_THAN
- GREATER_THAN_OR_EQUAL_TO
- LESS_THAN

View File

@ -552,6 +552,10 @@ public class ESUtils {
return orQueryBuilder;
}
private static boolean isCaseInsensitiveSearchEnabled(Condition condition) {
return condition == Condition.IEQUAL;
}
@Nonnull
private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
@Nonnull Criterion criterion,
@ -564,6 +568,8 @@ public class ESUtils {
final AspectRetriever aspectRetriever = opContext.getAspectRetriever();
final String fieldName = toParentField(criterion.getField(), aspectRetriever);
boolean enableCaseInsensitiveSearch;
if (condition == Condition.IS_NULL) {
return QueryBuilders.boolQuery()
.mustNot(QueryBuilders.existsQuery(fieldName))
@ -573,9 +579,15 @@ public class ESUtils {
.must(QueryBuilders.existsQuery(fieldName))
.queryName(queryName != null ? queryName : fieldName);
} else if (criterion.hasValues()) {
if (condition == Condition.EQUAL) {
if (condition == Condition.EQUAL || condition == Condition.IEQUAL) {
enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
return buildEqualsConditionFromCriterion(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)
fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch)
.queryName(queryName != null ? queryName : fieldName);
} else if (RANGE_QUERY_CONDITIONS.contains(condition)) {
return buildRangeQueryFromCriterion(
@ -596,7 +608,7 @@ public class ESUtils {
return buildEndsWithConditionFromCriterion(
fieldName, criterion, queryName, isTimeseries, aspectRetriever);
} else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) {
enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
return QueryFilterRewriterContext.builder()
.queryFilterRewriteChain(queryFilterRewriteChain)
.condition(condition)
@ -605,7 +617,12 @@ public class ESUtils {
.rewrite(
opContext,
buildEqualsConditionFromCriterion(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever))
fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch))
.queryName(queryName != null ? queryName : fieldName);
}
}
@ -670,9 +687,15 @@ public class ESUtils {
@Nonnull final Criterion criterion,
final boolean isTimeseries,
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
@Nonnull AspectRetriever aspectRetriever) {
@Nonnull AspectRetriever aspectRetriever,
boolean enableCaseInsensitiveSearch) {
return buildEqualsConditionFromCriterionWithValues(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever);
fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch);
}
/**
@ -684,7 +707,8 @@ public class ESUtils {
@Nonnull final Criterion criterion,
final boolean isTimeseries,
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
@Nonnull AspectRetriever aspectRetriever) {
@Nonnull AspectRetriever aspectRetriever,
boolean enableCaseInsensitiveSearch) {
Set<String> fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever);
if (fieldTypes.size() > 1) {
log.warn(
@ -704,6 +728,21 @@ public class ESUtils {
criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList());
return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName);
}
if (enableCaseInsensitiveSearch) {
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
criterion
.getValues()
.forEach(
value ->
boolQuery.should(
QueryBuilders.termQuery(
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
value.trim())
.caseInsensitive(true)));
return boolQuery;
}
return QueryBuilders.termsQuery(
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
criterion.getValues())

View File

@ -101,6 +101,7 @@ public class ESUtilsTest {
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
final Criterion multiValueCriterion =
@ -150,6 +151,85 @@ public class ESUtilsTest {
Assert.assertEquals(result.toString(), expected);
}
@Test
public void testGetQueryBuilderFromCriterionIEqualValues() { // Test case insensitive searches
final Criterion singleValueCriterion =
buildCriterion("myTestField", Condition.IEQUAL, "value1");
QueryBuilder result =
ESUtils.getQueryBuilderFromCriterion(
singleValueCriterion,
false,
new HashMap<>(),
mock(OperationContext.class),
QueryFilterRewriteChain.EMPTY);
String expected =
"{\n"
+ " \"bool\" : {\n"
+ " \"should\" : [\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value1\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " ],\n"
+ " \"adjust_pure_negative\" : true,\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
final Criterion multiValueCriterion =
buildCriterion("myTestField", Condition.IEQUAL, "value1", "value2");
result =
ESUtils.getQueryBuilderFromCriterion(
multiValueCriterion,
false,
new HashMap<>(),
mock(OperationContext.class),
QueryFilterRewriteChain.EMPTY);
expected =
"{\n"
+ " \"bool\" : {\n"
+ " \"should\" : [\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value1\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " },\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value2\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " ],\n"
+ " \"adjust_pure_negative\" : true,\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
}
@Test
public void testGetQueryBuilderFromCriterionContain() {
final Criterion singleValueCriterion =

View File

@ -20,6 +20,11 @@ enum Condition {
*/
EQUAL
/**
* Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs
*/
IEQUAL
/**
* Represent the relation: field is null, e.g. platform is null
*/

View File

@ -1291,6 +1291,7 @@ where valid conditions include
- CONTAIN
- END_WITH
- EQUAL
- IEQUAL (support case insensitive values)
- GREATER_THAN
- GREATER_THAN_OR_EQUAL_TO
- LESS_THAN

View File

@ -56,13 +56,14 @@
"type" : "enum",
"name" : "Condition",
"doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",

View File

@ -162,13 +162,14 @@
"type" : "enum",
"name" : "Condition",
"doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",

View File

@ -6057,13 +6057,14 @@
"name" : "Condition",
"namespace" : "com.linkedin.metadata.query.filter",
"doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",