Added IEQUAL operator to support case insensitive searches (#11501)

Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
This commit is contained in:
Nbagga14 2024-10-05 00:35:29 +05:30 committed by GitHub
parent 7c6d31ca01
commit cc63f53c6a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 145 additions and 10 deletions

View File

@ -521,6 +521,11 @@ enum FilterOperator {
""" """
EQUAL EQUAL
"""
Represent the relation: field = value (case-insensitive), e.g. platform = HDFS
"""
IEQUAL
""" """
* Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"] * Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"]
""" """
@ -575,6 +580,7 @@ enum FilterOperator {
Represent the relation: URN field matches any nested child or parent in addition to the given URN Represent the relation: URN field matches any nested child or parent in addition to the given URN
""" """
RELATED_INCL RELATED_INCL
} }
""" """

View File

@ -1203,6 +1203,7 @@ where valid conditions include
- CONTAIN - CONTAIN
- END_WITH - END_WITH
- EQUAL - EQUAL
- IEQUAL (Supports case insensitive equals)
- GREATER_THAN - GREATER_THAN
- GREATER_THAN_OR_EQUAL_TO - GREATER_THAN_OR_EQUAL_TO
- LESS_THAN - LESS_THAN

View File

@ -552,6 +552,10 @@ public class ESUtils {
return orQueryBuilder; return orQueryBuilder;
} }
private static boolean isCaseInsensitiveSearchEnabled(Condition condition) {
return condition == Condition.IEQUAL;
}
@Nonnull @Nonnull
private static QueryBuilder getQueryBuilderFromCriterionForSingleField( private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
@Nonnull Criterion criterion, @Nonnull Criterion criterion,
@ -564,6 +568,8 @@ public class ESUtils {
final AspectRetriever aspectRetriever = opContext.getAspectRetriever(); final AspectRetriever aspectRetriever = opContext.getAspectRetriever();
final String fieldName = toParentField(criterion.getField(), aspectRetriever); final String fieldName = toParentField(criterion.getField(), aspectRetriever);
boolean enableCaseInsensitiveSearch;
if (condition == Condition.IS_NULL) { if (condition == Condition.IS_NULL) {
return QueryBuilders.boolQuery() return QueryBuilders.boolQuery()
.mustNot(QueryBuilders.existsQuery(fieldName)) .mustNot(QueryBuilders.existsQuery(fieldName))
@ -573,9 +579,15 @@ public class ESUtils {
.must(QueryBuilders.existsQuery(fieldName)) .must(QueryBuilders.existsQuery(fieldName))
.queryName(queryName != null ? queryName : fieldName); .queryName(queryName != null ? queryName : fieldName);
} else if (criterion.hasValues()) { } else if (criterion.hasValues()) {
if (condition == Condition.EQUAL) { if (condition == Condition.EQUAL || condition == Condition.IEQUAL) {
enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
return buildEqualsConditionFromCriterion( return buildEqualsConditionFromCriterion(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever) fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch)
.queryName(queryName != null ? queryName : fieldName); .queryName(queryName != null ? queryName : fieldName);
} else if (RANGE_QUERY_CONDITIONS.contains(condition)) { } else if (RANGE_QUERY_CONDITIONS.contains(condition)) {
return buildRangeQueryFromCriterion( return buildRangeQueryFromCriterion(
@ -596,7 +608,7 @@ public class ESUtils {
return buildEndsWithConditionFromCriterion( return buildEndsWithConditionFromCriterion(
fieldName, criterion, queryName, isTimeseries, aspectRetriever); fieldName, criterion, queryName, isTimeseries, aspectRetriever);
} else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) { } else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) {
enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
return QueryFilterRewriterContext.builder() return QueryFilterRewriterContext.builder()
.queryFilterRewriteChain(queryFilterRewriteChain) .queryFilterRewriteChain(queryFilterRewriteChain)
.condition(condition) .condition(condition)
@ -605,7 +617,12 @@ public class ESUtils {
.rewrite( .rewrite(
opContext, opContext,
buildEqualsConditionFromCriterion( buildEqualsConditionFromCriterion(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)) fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch))
.queryName(queryName != null ? queryName : fieldName); .queryName(queryName != null ? queryName : fieldName);
} }
} }
@ -670,9 +687,15 @@ public class ESUtils {
@Nonnull final Criterion criterion, @Nonnull final Criterion criterion,
final boolean isTimeseries, final boolean isTimeseries,
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes, final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
@Nonnull AspectRetriever aspectRetriever) { @Nonnull AspectRetriever aspectRetriever,
boolean enableCaseInsensitiveSearch) {
return buildEqualsConditionFromCriterionWithValues( return buildEqualsConditionFromCriterionWithValues(
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); fieldName,
criterion,
isTimeseries,
searchableFieldTypes,
aspectRetriever,
enableCaseInsensitiveSearch);
} }
/** /**
@ -684,7 +707,8 @@ public class ESUtils {
@Nonnull final Criterion criterion, @Nonnull final Criterion criterion,
final boolean isTimeseries, final boolean isTimeseries,
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes, final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
@Nonnull AspectRetriever aspectRetriever) { @Nonnull AspectRetriever aspectRetriever,
boolean enableCaseInsensitiveSearch) {
Set<String> fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); Set<String> fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever);
if (fieldTypes.size() > 1) { if (fieldTypes.size() > 1) {
log.warn( log.warn(
@ -704,6 +728,21 @@ public class ESUtils {
criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList()); criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList());
return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName); return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName);
} }
if (enableCaseInsensitiveSearch) {
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
criterion
.getValues()
.forEach(
value ->
boolQuery.should(
QueryBuilders.termQuery(
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
value.trim())
.caseInsensitive(true)));
return boolQuery;
}
return QueryBuilders.termsQuery( return QueryBuilders.termsQuery(
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
criterion.getValues()) criterion.getValues())

View File

@ -101,6 +101,7 @@ public class ESUtilsTest {
+ " \"_name\" : \"myTestField\"\n" + " \"_name\" : \"myTestField\"\n"
+ " }\n" + " }\n"
+ "}"; + "}";
Assert.assertEquals(result.toString(), expected); Assert.assertEquals(result.toString(), expected);
final Criterion multiValueCriterion = final Criterion multiValueCriterion =
@ -150,6 +151,85 @@ public class ESUtilsTest {
Assert.assertEquals(result.toString(), expected); Assert.assertEquals(result.toString(), expected);
} }
@Test
public void testGetQueryBuilderFromCriterionIEqualValues() { // Test case insensitive searches
final Criterion singleValueCriterion =
buildCriterion("myTestField", Condition.IEQUAL, "value1");
QueryBuilder result =
ESUtils.getQueryBuilderFromCriterion(
singleValueCriterion,
false,
new HashMap<>(),
mock(OperationContext.class),
QueryFilterRewriteChain.EMPTY);
String expected =
"{\n"
+ " \"bool\" : {\n"
+ " \"should\" : [\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value1\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " ],\n"
+ " \"adjust_pure_negative\" : true,\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
final Criterion multiValueCriterion =
buildCriterion("myTestField", Condition.IEQUAL, "value1", "value2");
result =
ESUtils.getQueryBuilderFromCriterion(
multiValueCriterion,
false,
new HashMap<>(),
mock(OperationContext.class),
QueryFilterRewriteChain.EMPTY);
expected =
"{\n"
+ " \"bool\" : {\n"
+ " \"should\" : [\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value1\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " },\n"
+ " {\n"
+ " \"term\" : {\n"
+ " \"myTestField.keyword\" : {\n"
+ " \"value\" : \"value2\",\n"
+ " \"case_insensitive\" : true,\n"
+ " \"boost\" : 1.0\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " ],\n"
+ " \"adjust_pure_negative\" : true,\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"myTestField\"\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
}
@Test @Test
public void testGetQueryBuilderFromCriterionContain() { public void testGetQueryBuilderFromCriterionContain() {
final Criterion singleValueCriterion = final Criterion singleValueCriterion =

View File

@ -20,6 +20,11 @@ enum Condition {
*/ */
EQUAL EQUAL
/**
* Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs
*/
IEQUAL
/** /**
* Represent the relation: field is null, e.g. platform is null * Represent the relation: field is null, e.g. platform is null
*/ */

View File

@ -1291,6 +1291,7 @@ where valid conditions include
- CONTAIN - CONTAIN
- END_WITH - END_WITH
- EQUAL - EQUAL
- IEQUAL (support case insensitive values)
- GREATER_THAN - GREATER_THAN
- GREATER_THAN_OR_EQUAL_TO - GREATER_THAN_OR_EQUAL_TO
- LESS_THAN - LESS_THAN

View File

@ -56,13 +56,14 @@
"type" : "enum", "type" : "enum",
"name" : "Condition", "name" : "Condition",
"doc" : "The matching condition in a filter criterion", "doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : { "symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",

View File

@ -162,13 +162,14 @@
"type" : "enum", "type" : "enum",
"name" : "Condition", "name" : "Condition",
"doc" : "The matching condition in a filter criterion", "doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : { "symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",

View File

@ -6057,13 +6057,14 @@
"name" : "Condition", "name" : "Condition",
"namespace" : "com.linkedin.metadata.query.filter", "namespace" : "com.linkedin.metadata.query.filter",
"doc" : "The matching condition in a filter criterion", "doc" : "The matching condition in a filter criterion",
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
"symbolDocs" : { "symbolDocs" : {
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",