mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-14 20:26:53 +00:00
Added IEQUAL operator to support case insensitive searches (#11501)
Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
This commit is contained in:
parent
7c6d31ca01
commit
cc63f53c6a
@ -521,6 +521,11 @@ enum FilterOperator {
|
||||
"""
|
||||
EQUAL
|
||||
|
||||
"""
|
||||
Represent the relation: field = value (case-insensitive), e.g. platform = HDFS
|
||||
"""
|
||||
IEQUAL
|
||||
|
||||
"""
|
||||
* Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"]
|
||||
"""
|
||||
@ -575,6 +580,7 @@ enum FilterOperator {
|
||||
Represent the relation: URN field matches any nested child or parent in addition to the given URN
|
||||
"""
|
||||
RELATED_INCL
|
||||
|
||||
}
|
||||
|
||||
"""
|
||||
|
@ -1203,6 +1203,7 @@ where valid conditions include
|
||||
- CONTAIN
|
||||
- END_WITH
|
||||
- EQUAL
|
||||
- IEQUAL (Supports case insensitive equals)
|
||||
- GREATER_THAN
|
||||
- GREATER_THAN_OR_EQUAL_TO
|
||||
- LESS_THAN
|
||||
|
@ -552,6 +552,10 @@ public class ESUtils {
|
||||
return orQueryBuilder;
|
||||
}
|
||||
|
||||
private static boolean isCaseInsensitiveSearchEnabled(Condition condition) {
|
||||
return condition == Condition.IEQUAL;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static QueryBuilder getQueryBuilderFromCriterionForSingleField(
|
||||
@Nonnull Criterion criterion,
|
||||
@ -564,6 +568,8 @@ public class ESUtils {
|
||||
final AspectRetriever aspectRetriever = opContext.getAspectRetriever();
|
||||
final String fieldName = toParentField(criterion.getField(), aspectRetriever);
|
||||
|
||||
boolean enableCaseInsensitiveSearch;
|
||||
|
||||
if (condition == Condition.IS_NULL) {
|
||||
return QueryBuilders.boolQuery()
|
||||
.mustNot(QueryBuilders.existsQuery(fieldName))
|
||||
@ -573,9 +579,15 @@ public class ESUtils {
|
||||
.must(QueryBuilders.existsQuery(fieldName))
|
||||
.queryName(queryName != null ? queryName : fieldName);
|
||||
} else if (criterion.hasValues()) {
|
||||
if (condition == Condition.EQUAL) {
|
||||
if (condition == Condition.EQUAL || condition == Condition.IEQUAL) {
|
||||
enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
|
||||
return buildEqualsConditionFromCriterion(
|
||||
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)
|
||||
fieldName,
|
||||
criterion,
|
||||
isTimeseries,
|
||||
searchableFieldTypes,
|
||||
aspectRetriever,
|
||||
enableCaseInsensitiveSearch)
|
||||
.queryName(queryName != null ? queryName : fieldName);
|
||||
} else if (RANGE_QUERY_CONDITIONS.contains(condition)) {
|
||||
return buildRangeQueryFromCriterion(
|
||||
@ -596,7 +608,7 @@ public class ESUtils {
|
||||
return buildEndsWithConditionFromCriterion(
|
||||
fieldName, criterion, queryName, isTimeseries, aspectRetriever);
|
||||
} else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) {
|
||||
|
||||
enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition);
|
||||
return QueryFilterRewriterContext.builder()
|
||||
.queryFilterRewriteChain(queryFilterRewriteChain)
|
||||
.condition(condition)
|
||||
@ -605,7 +617,12 @@ public class ESUtils {
|
||||
.rewrite(
|
||||
opContext,
|
||||
buildEqualsConditionFromCriterion(
|
||||
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever))
|
||||
fieldName,
|
||||
criterion,
|
||||
isTimeseries,
|
||||
searchableFieldTypes,
|
||||
aspectRetriever,
|
||||
enableCaseInsensitiveSearch))
|
||||
.queryName(queryName != null ? queryName : fieldName);
|
||||
}
|
||||
}
|
||||
@ -670,9 +687,15 @@ public class ESUtils {
|
||||
@Nonnull final Criterion criterion,
|
||||
final boolean isTimeseries,
|
||||
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
|
||||
@Nonnull AspectRetriever aspectRetriever) {
|
||||
@Nonnull AspectRetriever aspectRetriever,
|
||||
boolean enableCaseInsensitiveSearch) {
|
||||
return buildEqualsConditionFromCriterionWithValues(
|
||||
fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever);
|
||||
fieldName,
|
||||
criterion,
|
||||
isTimeseries,
|
||||
searchableFieldTypes,
|
||||
aspectRetriever,
|
||||
enableCaseInsensitiveSearch);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -684,7 +707,8 @@ public class ESUtils {
|
||||
@Nonnull final Criterion criterion,
|
||||
final boolean isTimeseries,
|
||||
final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes,
|
||||
@Nonnull AspectRetriever aspectRetriever) {
|
||||
@Nonnull AspectRetriever aspectRetriever,
|
||||
boolean enableCaseInsensitiveSearch) {
|
||||
Set<String> fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever);
|
||||
if (fieldTypes.size() > 1) {
|
||||
log.warn(
|
||||
@ -704,6 +728,21 @@ public class ESUtils {
|
||||
criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList());
|
||||
return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName);
|
||||
}
|
||||
|
||||
if (enableCaseInsensitiveSearch) {
|
||||
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
|
||||
criterion
|
||||
.getValues()
|
||||
.forEach(
|
||||
value ->
|
||||
boolQuery.should(
|
||||
QueryBuilders.termQuery(
|
||||
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
|
||||
value.trim())
|
||||
.caseInsensitive(true)));
|
||||
return boolQuery;
|
||||
}
|
||||
|
||||
return QueryBuilders.termsQuery(
|
||||
toKeywordField(criterion.getField(), isTimeseries, aspectRetriever),
|
||||
criterion.getValues())
|
||||
|
@ -101,6 +101,7 @@ public class ESUtilsTest {
|
||||
+ " \"_name\" : \"myTestField\"\n"
|
||||
+ " }\n"
|
||||
+ "}";
|
||||
|
||||
Assert.assertEquals(result.toString(), expected);
|
||||
|
||||
final Criterion multiValueCriterion =
|
||||
@ -150,6 +151,85 @@ public class ESUtilsTest {
|
||||
Assert.assertEquals(result.toString(), expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetQueryBuilderFromCriterionIEqualValues() { // Test case insensitive searches
|
||||
|
||||
final Criterion singleValueCriterion =
|
||||
buildCriterion("myTestField", Condition.IEQUAL, "value1");
|
||||
|
||||
QueryBuilder result =
|
||||
ESUtils.getQueryBuilderFromCriterion(
|
||||
singleValueCriterion,
|
||||
false,
|
||||
new HashMap<>(),
|
||||
mock(OperationContext.class),
|
||||
QueryFilterRewriteChain.EMPTY);
|
||||
|
||||
String expected =
|
||||
"{\n"
|
||||
+ " \"bool\" : {\n"
|
||||
+ " \"should\" : [\n"
|
||||
+ " {\n"
|
||||
+ " \"term\" : {\n"
|
||||
+ " \"myTestField.keyword\" : {\n"
|
||||
+ " \"value\" : \"value1\",\n"
|
||||
+ " \"case_insensitive\" : true,\n"
|
||||
+ " \"boost\" : 1.0\n"
|
||||
+ " }\n"
|
||||
+ " }\n"
|
||||
+ " }\n"
|
||||
+ " ],\n"
|
||||
+ " \"adjust_pure_negative\" : true,\n"
|
||||
+ " \"boost\" : 1.0,\n"
|
||||
+ " \"_name\" : \"myTestField\"\n"
|
||||
+ " }\n"
|
||||
+ "}";
|
||||
|
||||
Assert.assertEquals(result.toString(), expected);
|
||||
|
||||
final Criterion multiValueCriterion =
|
||||
buildCriterion("myTestField", Condition.IEQUAL, "value1", "value2");
|
||||
|
||||
result =
|
||||
ESUtils.getQueryBuilderFromCriterion(
|
||||
multiValueCriterion,
|
||||
false,
|
||||
new HashMap<>(),
|
||||
mock(OperationContext.class),
|
||||
QueryFilterRewriteChain.EMPTY);
|
||||
|
||||
expected =
|
||||
"{\n"
|
||||
+ " \"bool\" : {\n"
|
||||
+ " \"should\" : [\n"
|
||||
+ " {\n"
|
||||
+ " \"term\" : {\n"
|
||||
+ " \"myTestField.keyword\" : {\n"
|
||||
+ " \"value\" : \"value1\",\n"
|
||||
+ " \"case_insensitive\" : true,\n"
|
||||
+ " \"boost\" : 1.0\n"
|
||||
+ " }\n"
|
||||
+ " }\n"
|
||||
+ " },\n"
|
||||
+ " {\n"
|
||||
+ " \"term\" : {\n"
|
||||
+ " \"myTestField.keyword\" : {\n"
|
||||
+ " \"value\" : \"value2\",\n"
|
||||
+ " \"case_insensitive\" : true,\n"
|
||||
+ " \"boost\" : 1.0\n"
|
||||
+ " }\n"
|
||||
+ " }\n"
|
||||
+ " }\n"
|
||||
+ " ],\n"
|
||||
+ " \"adjust_pure_negative\" : true,\n"
|
||||
+ " \"boost\" : 1.0,\n"
|
||||
+ " \"_name\" : \"myTestField\"\n"
|
||||
+ " }\n"
|
||||
+ "}";
|
||||
|
||||
Assert.assertEquals(result.toString(), expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetQueryBuilderFromCriterionContain() {
|
||||
final Criterion singleValueCriterion =
|
||||
|
@ -20,6 +20,11 @@ enum Condition {
|
||||
*/
|
||||
EQUAL
|
||||
|
||||
/**
|
||||
* Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs
|
||||
*/
|
||||
IEQUAL
|
||||
|
||||
/**
|
||||
* Represent the relation: field is null, e.g. platform is null
|
||||
*/
|
||||
|
@ -1291,6 +1291,7 @@ where valid conditions include
|
||||
- CONTAIN
|
||||
- END_WITH
|
||||
- EQUAL
|
||||
- IEQUAL (support case insensitive values)
|
||||
- GREATER_THAN
|
||||
- GREATER_THAN_OR_EQUAL_TO
|
||||
- LESS_THAN
|
||||
|
@ -56,13 +56,14 @@
|
||||
"type" : "enum",
|
||||
"name" : "Condition",
|
||||
"doc" : "The matching condition in a filter criterion",
|
||||
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
|
||||
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
|
||||
"symbolDocs" : {
|
||||
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
|
||||
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
|
||||
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
|
||||
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
|
||||
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
|
||||
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
|
||||
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
|
||||
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
|
||||
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
|
||||
|
@ -162,13 +162,14 @@
|
||||
"type" : "enum",
|
||||
"name" : "Condition",
|
||||
"doc" : "The matching condition in a filter criterion",
|
||||
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
|
||||
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
|
||||
"symbolDocs" : {
|
||||
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
|
||||
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
|
||||
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
|
||||
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
|
||||
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
|
||||
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
|
||||
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
|
||||
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
|
||||
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
|
||||
|
@ -6057,13 +6057,14 @@
|
||||
"name" : "Condition",
|
||||
"namespace" : "com.linkedin.metadata.query.filter",
|
||||
"doc" : "The matching condition in a filter criterion",
|
||||
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
|
||||
"symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ],
|
||||
"symbolDocs" : {
|
||||
"ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN",
|
||||
"CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile",
|
||||
"DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN",
|
||||
"END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event",
|
||||
"EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs",
|
||||
"IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs",
|
||||
"EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)",
|
||||
"GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5",
|
||||
"GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",
|
||||
|
Loading…
x
Reference in New Issue
Block a user