diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 32d73845e1..d0f669f05f 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -521,6 +521,11 @@ enum FilterOperator { """ EQUAL + """ + Represent the relation: field = value (case-insensitive), e.g. platform = HDFS + """ + IEQUAL + """ * Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"] """ @@ -575,6 +580,7 @@ enum FilterOperator { Represent the relation: URN field matches any nested child or parent in addition to the given URN """ RELATED_INCL + } """ diff --git a/docs/api/restli/restli-overview.md b/docs/api/restli/restli-overview.md index d8a8107526..22b913d9a2 100644 --- a/docs/api/restli/restli-overview.md +++ b/docs/api/restli/restli-overview.md @@ -1203,6 +1203,7 @@ where valid conditions include - CONTAIN - END_WITH - EQUAL + - IEQUAL (Supports case insensitive equals) - GREATER_THAN - GREATER_THAN_OR_EQUAL_TO - LESS_THAN diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index ace7fa2bc1..9f48727aec 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -552,6 +552,10 @@ public class ESUtils { return orQueryBuilder; } + private static boolean isCaseInsensitiveSearchEnabled(Condition condition) { + return condition == Condition.IEQUAL; + } + @Nonnull private static QueryBuilder getQueryBuilderFromCriterionForSingleField( @Nonnull Criterion criterion, @@ -564,6 +568,8 @@ public class ESUtils { final AspectRetriever aspectRetriever = opContext.getAspectRetriever(); final String fieldName = toParentField(criterion.getField(), aspectRetriever); + boolean enableCaseInsensitiveSearch; + if (condition == Condition.IS_NULL) { return QueryBuilders.boolQuery() .mustNot(QueryBuilders.existsQuery(fieldName)) @@ -573,9 +579,15 @@ public class ESUtils { .must(QueryBuilders.existsQuery(fieldName)) .queryName(queryName != null ? queryName : fieldName); } else if (criterion.hasValues()) { - if (condition == Condition.EQUAL) { + if (condition == Condition.EQUAL || condition == Condition.IEQUAL) { + enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition); return buildEqualsConditionFromCriterion( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever) + fieldName, + criterion, + isTimeseries, + searchableFieldTypes, + aspectRetriever, + enableCaseInsensitiveSearch) .queryName(queryName != null ? queryName : fieldName); } else if (RANGE_QUERY_CONDITIONS.contains(condition)) { return buildRangeQueryFromCriterion( @@ -596,7 +608,7 @@ public class ESUtils { return buildEndsWithConditionFromCriterion( fieldName, criterion, queryName, isTimeseries, aspectRetriever); } else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) { - + enableCaseInsensitiveSearch = isCaseInsensitiveSearchEnabled(condition); return QueryFilterRewriterContext.builder() .queryFilterRewriteChain(queryFilterRewriteChain) .condition(condition) @@ -605,7 +617,12 @@ public class ESUtils { .rewrite( opContext, buildEqualsConditionFromCriterion( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)) + fieldName, + criterion, + isTimeseries, + searchableFieldTypes, + aspectRetriever, + enableCaseInsensitiveSearch)) .queryName(queryName != null ? queryName : fieldName); } } @@ -670,9 +687,15 @@ public class ESUtils { @Nonnull final Criterion criterion, final boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull AspectRetriever aspectRetriever, + boolean enableCaseInsensitiveSearch) { return buildEqualsConditionFromCriterionWithValues( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); + fieldName, + criterion, + isTimeseries, + searchableFieldTypes, + aspectRetriever, + enableCaseInsensitiveSearch); } /** @@ -684,7 +707,8 @@ public class ESUtils { @Nonnull final Criterion criterion, final boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull AspectRetriever aspectRetriever, + boolean enableCaseInsensitiveSearch) { Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); if (fieldTypes.size() > 1) { log.warn( @@ -704,6 +728,21 @@ public class ESUtils { criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList()); return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName); } + + if (enableCaseInsensitiveSearch) { + BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); + criterion + .getValues() + .forEach( + value -> + boolQuery.should( + QueryBuilders.termQuery( + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), + value.trim()) + .caseInsensitive(true))); + return boolQuery; + } + return QueryBuilders.termsQuery( toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), criterion.getValues()) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index 03d104b9e7..928818f8c1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -101,6 +101,7 @@ public class ESUtilsTest { + " \"_name\" : \"myTestField\"\n" + " }\n" + "}"; + Assert.assertEquals(result.toString(), expected); final Criterion multiValueCriterion = @@ -150,6 +151,85 @@ public class ESUtilsTest { Assert.assertEquals(result.toString(), expected); } + @Test + public void testGetQueryBuilderFromCriterionIEqualValues() { // Test case insensitive searches + + final Criterion singleValueCriterion = + buildCriterion("myTestField", Condition.IEQUAL, "value1"); + + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); + + String expected = + "{\n" + + " \"bool\" : {\n" + + " \"should\" : [\n" + + " {\n" + + " \"term\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"value\" : \"value1\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + "}"; + + Assert.assertEquals(result.toString(), expected); + + final Criterion multiValueCriterion = + buildCriterion("myTestField", Condition.IEQUAL, "value1", "value2"); + + result = + ESUtils.getQueryBuilderFromCriterion( + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); + + expected = + "{\n" + + " \"bool\" : {\n" + + " \"should\" : [\n" + + " {\n" + + " \"term\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"value\" : \"value1\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"value\" : \"value2\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + "}"; + + Assert.assertEquals(result.toString(), expected); + } + @Test public void testGetQueryBuilderFromCriterionContain() { final Criterion singleValueCriterion = diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl index a79055ea3d..193e762854 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl @@ -20,6 +20,11 @@ enum Condition { */ EQUAL + /** + * Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs + */ + IEQUAL + /** * Represent the relation: field is null, e.g. platform is null */ diff --git a/metadata-service/README.md b/metadata-service/README.md index 8aec1ecc3a..0c7085b10d 100644 --- a/metadata-service/README.md +++ b/metadata-service/README.md @@ -1291,6 +1291,7 @@ where valid conditions include - CONTAIN - END_WITH - EQUAL + - IEQUAL (support case insensitive values) - GREATER_THAN - GREATER_THAN_OR_EQUAL_TO - LESS_THAN diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json index e8cc193f34..061feafac1 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json @@ -56,13 +56,14 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", + "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index bc4d222e31..fa58edb41c 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -162,13 +162,14 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", + "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 8ff0aa9307..086f21cfe7 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6057,13 +6057,14 @@ "name" : "Condition", "namespace" : "com.linkedin.metadata.query.filter", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL","IEQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", + "IEQUAL" : "Represent the relation: field = value and support case insensitive values, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", "GREATER_THAN" : "Represent the relation greater than, e.g. ownerCount > 5", "GREATER_THAN_OR_EQUAL_TO" : "Represent the relation greater than or equal to, e.g. ownerCount >= 5",