From ebb2af637f125c7482bfefde80aba4fc617fe31a Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 1 May 2023 13:18:19 -0500 Subject: [PATCH] feat(search): enable search initial customization (#7901) --- docs/how/search.md | 147 +++++++++++++++ .../config/search/CustomConfiguration.java | 47 +++++ .../config/search/SearchConfiguration.java | 1 + .../search/custom/BoolQueryConfiguration.java | 27 +++ .../custom/CustomSearchConfiguration.java | 23 +++ .../search/custom/QueryConfiguration.java | 103 ++++++++++ .../elasticsearch/query/ESSearchDAO.java | 25 ++- .../query/request/CustomizedQueryHandler.java | 43 +++++ .../query/request/SearchFieldConfig.java | 52 +++-- .../query/request/SearchQueryBuilder.java | 163 +++++++++------- .../query/request/SearchRequestHandler.java | 23 ++- .../metadata/ESSampleDataFixture.java | 10 +- .../metadata/ESSearchLineageFixture.java | 2 +- .../search/LineageSearchServiceTest.java | 2 +- .../metadata/search/SearchServiceTest.java | 2 +- .../ElasticSearchServiceTest.java | 2 +- .../fixtures/SampleDataFixtureTests.java | 25 +-- .../request/CustomizedQueryHandlerTest.java | 178 ++++++++++++++++++ .../query/request/SearchQueryBuilderTest.java | 99 +++++++++- .../request/SearchRequestHandlerTest.java | 8 +- .../sample_data/datasetindex_v2.json.gz | Bin 3674 -> 3688 bytes .../resources/search_config_builder_test.yml | 74 ++++++++ .../resources/search_config_fixture_test.yml | 51 +++++ .../src/test/resources/search_config_test.yml | 55 ++++++ .../com/linkedin/schema/SchemaField.pdl | 3 +- .../search/ElasticSearchServiceFactory.java | 20 +- .../src/main/resources/application.yml | 3 + .../src/main/resources/search_config.yml | 71 +++++++ .../gms/servlet/ConfigSearchExport.java | 2 +- 29 files changed, 1135 insertions(+), 126 deletions(-) create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java create mode 100644 metadata-io/src/test/resources/search_config_builder_test.yml create mode 100644 metadata-io/src/test/resources/search_config_fixture_test.yml create mode 100644 metadata-io/src/test/resources/search_config_test.yml create mode 100644 metadata-service/factories/src/main/resources/search_config.yml diff --git a/docs/how/search.md b/docs/how/search.md index ba1cdaf8b5..fd1403e67d 100644 --- a/docs/how/search.md +++ b/docs/how/search.md @@ -182,6 +182,153 @@ for integrations and programmatic use-cases. ### DataHub Blog * [Using DataHub for Search & Discovery](https://blog.datahubproject.io/using-datahub-for-search-discovery-fa309089be22) +## Customizing Search + +It is possible to completely customize search ranking, filtering, and queries using a search configuration yaml file. +This no-code solution provides the ability to extend, or replace, the Elasticsearch-based search functionality. The +only limitation is that the information used in the query/ranking/filtering must be present in the entities' document, +however this does include `customProperties`, `tags`, `terms`, `domain`, as well as many additional fields. + +Additionally, multiple customizations can be applied to different query strings. A regex is applied to the search query +to determine which customized search profile to use. This means a different query/ranking/filtering can be applied to +a `select all`/`*` query or one that contains an actual query. + +Search results (excluding select `*`) are a balance between relevancy and the scoring function. In +general, when trying to improve relevancy, focus on changing the query in the `boolQuery` section and rely on the +`functionScore` for surfacing the *importance* in the case of a relevancy tie. Consider the scenario +where a dataset named `orders` exists in multiple places. The relevancy between the dataset with the **name** `orders` and +the **term** `orders` is the same, however one location may be more important and thus the function score preferred. + +**Note:** The customized query is a pass-through to Elasticsearch and must comply with their API, syntax errors are possible. +It is recommended to test the customized queries prior to production deployment and knowledge of the Elasticsearch query +language is required. + +### Enable Custom Search + +The following environment variables on GMS control whether a search configuration is enabled and the location of the +configuration file. + +Enable Custom Search: +```shell +ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED=true +``` + +Custom Search File Location: +```shell +ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE=search_config.yml +``` +The location of the configuration file can be on the Java classpath or the local filesystem. A default configuration +file is included with the GMS jar with the name `search_config.yml`. + +### Search Configuration + +The search configuration yaml contains a simple list of configuration profiles selected using the `queryRegex`. If a +single profile is desired, a catch-all regex of `.*` can be used. + +The list of search configurations can be grouped into 4 general sections. + +1. `queryRegex` - Responsible for selecting the search customization based on the [regex matching](https://www.w3schools.com/java/java_regex.asp) the search query string. +*The first match is applied.* +2. Built-in query booleans - There are 3 built-in queries which can be individually enabled/disabled. These include +the `simple query string`[[1]](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-simple-query-string-query.html), +`match phrase prefix`[[2]](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-match-query-phrase-prefix.html), and +`exact match`[[3]](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-term-query.html) queries, +enabled with the following booleans +respectively [`simpleQuery`, `prefixMatchQuery`, `exactMatchQuery`] +3. `boolQuery` - The base Elasticsearch `boolean query`[[4](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-bool-query.html)]. +If enabled in #2 above, those queries will +appear in the `should` section of the `boolean query`[[4](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-bool-query.html)]. +4. `functionScore` - The Elasticsearch `function score`[[5](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-function-score-query.html#score-functions)] section of the overall query. + +### Examples + +These examples assume a match-all `queryRegex` of `.*` so that it would impact any search query for simplicity. + +#### Example 1: Ranking By Tags/Terms + +Boost entities with tags of `primary` or `gold` and an example glossary term's uuid. + +```yaml +queryConfigurations: + - queryRegex: .* + + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + + functionScore: + functions: + + - filter: + terms: + tags.keyword: + - urn:li:tag:primary + - urn:li:tag:gold + weight: 3.0 + + - filter: + terms: + glossaryTerms.keyword: + - urn:li:glossaryTerm:9afa9a59-93b2-47cb-9094-aa342eec24ad + weight: 3.0 + + score_mode: multiply + boost_mode: multiply +``` + +#### Example 2: Preferred Data Platform + +Boost the `urn:li:dataPlatform:hive` platform. + +```yaml +queryConfigurations: + - queryRegex: .* + + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + + functionScore: + functions: + - filter: + terms: + platform.keyword: + - urn:li:dataPlatform:hive + weight: 3.0 + score_mode: multiply + boost_mode: multiply +``` + +#### Example 3: Exclusion & Bury + +This configuration extends the 3 built-in queries with a rule to exclude `deprecated` entities from search results +because they are not generally relevant as well as reduces the score of `materialized`. + +```yaml +queryConfigurations: + - queryRegex: .* + + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + + boolQuery: + must_not: + term: + deprecated: + value: true + + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 0.5 + score_mode: multiply + boost_mode: multiply +``` + ## FAQ and Troubleshooting **How are the results ordered?** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java new file mode 100644 index 0000000000..73bdb34cc3 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java @@ -0,0 +1,47 @@ +package com.linkedin.metadata.config.search; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.FileSystemResource; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; + + +@Data +@AllArgsConstructor +@Slf4j +public class CustomConfiguration { + + private boolean configEnabled; + private String configFile; + + /** + * Materialize the search configuration from a location external to main application.yml + * @param mapper yaml enabled jackson mapper + * @return search configuration class + * @throws IOException + */ + public CustomSearchConfiguration customSearchConfiguration(ObjectMapper mapper) throws IOException { + if (configEnabled) { + log.info("Custom search configuration enabled."); + try (InputStream stream = new ClassPathResource(configFile).getInputStream()) { + log.info("Custom search configuration found in classpath: {}", configFile); + return mapper.readValue(stream, CustomSearchConfiguration.class); + } catch (FileNotFoundException e) { + try (InputStream stream = new FileSystemResource(configFile).getInputStream()) { + log.info("Custom search configuration found in filesystem: {}", configFile); + return mapper.readValue(stream, CustomSearchConfiguration.class); + } + } + } else { + log.info("Custom search configuration disabled."); + return null; + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java index 0ac726bb27..1a56db1bd6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java @@ -9,5 +9,6 @@ public class SearchConfiguration { private int maxTermBucketSize; private ExactMatchConfiguration exactMatch; private PartialConfiguration partial; + private CustomConfiguration custom; private GraphQueryConfiguration graph; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java new file mode 100644 index 0000000000..460501cc91 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java @@ -0,0 +1,27 @@ +package com.linkedin.metadata.config.search.custom; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; + + +@Builder(toBuilder = true) +@Getter +@ToString +@EqualsAndHashCode +@JsonDeserialize(builder = BoolQueryConfiguration.BoolQueryConfigurationBuilder.class) +public class BoolQueryConfiguration { + private Object must; + private Object should; + //CHECKSTYLE:OFF + private Object must_not; + //CHECKSTYLE:ON + private Object filter; + + @JsonPOJOBuilder(withPrefix = "") + public static class BoolQueryConfigurationBuilder { + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java new file mode 100644 index 0000000000..15deea7620 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java @@ -0,0 +1,23 @@ +package com.linkedin.metadata.config.search.custom; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; + +import java.util.List; + + +@Builder(toBuilder = true) +@Getter +@EqualsAndHashCode +@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class) +public class CustomSearchConfiguration { + + private List queryConfigurations; + + @JsonPOJOBuilder(withPrefix = "") + public static class CustomSearchConfigurationBuilder { + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java new file mode 100644 index 0000000000..b894a24a09 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java @@ -0,0 +1,103 @@ +package com.linkedin.metadata.config.search.custom; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.elasticsearch.search.SearchModule; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + + +@Slf4j +@Builder(toBuilder = true) +@Getter +@ToString +@EqualsAndHashCode +@JsonDeserialize(builder = QueryConfiguration.QueryConfigurationBuilder.class) +public class QueryConfiguration { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + static { + OBJECT_MAPPER.setSerializationInclusion(JsonInclude.Include.NON_NULL); + } + private static final NamedXContentRegistry X_CONTENT_REGISTRY; + static { + SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); + X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); + } + + private String queryRegex; + @Builder.Default + private boolean simpleQuery = true; + @Builder.Default + private boolean exactMatchQuery = true; + @Builder.Default + private boolean prefixMatchQuery = true; + private BoolQueryConfiguration boolQuery; + private Map functionScore; + + public FunctionScoreQueryBuilder functionScoreQueryBuilder(QueryBuilder queryBuilder) { + return toFunctionScoreQueryBuilder(queryBuilder, functionScore); + } + + public Optional boolQueryBuilder(String query) { + if (boolQuery != null) { + log.debug("Using custom query configuration queryRegex: {}", queryRegex); + } + return Optional.ofNullable(boolQuery).map(bq -> toBoolQueryBuilder(query, bq)); + } + + @JsonPOJOBuilder(withPrefix = "") + public static class QueryConfigurationBuilder { + } + + private static BoolQueryBuilder toBoolQueryBuilder(String query, BoolQueryConfiguration boolQuery) { + try { + String jsonFragment = OBJECT_MAPPER.writeValueAsString(boolQuery) + .replace("\"{{query_string}}\"", OBJECT_MAPPER.writeValueAsString(query)); + XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, + LoggingDeprecationHandler.INSTANCE, jsonFragment); + return BoolQueryBuilder.fromXContent(parser); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static FunctionScoreQueryBuilder toFunctionScoreQueryBuilder(QueryBuilder queryBuilder, + Map params) { + try { + HashMap body = new HashMap<>(params); + if (!body.isEmpty()) { + log.debug("Using custom scoring functions: {}", body); + } + + body.put("query", OBJECT_MAPPER.readValue(queryBuilder.toString(), Map.class)); + + String jsonFragment = OBJECT_MAPPER.writeValueAsString(Map.of( + "function_score", body + )); + XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, + LoggingDeprecationHandler.INSTANCE, jsonFragment); + return (FunctionScoreQueryBuilder) FunctionScoreQueryBuilder.parseInnerQueryBuilder(parser); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index 32873b6710..6c489171b6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -4,6 +4,7 @@ import com.codahale.metrics.Timer; import com.datahub.util.exception.ESQueryException; import com.fasterxml.jackson.core.type.TypeReference; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.AutoCompleteResult; @@ -53,6 +54,8 @@ public class ESSearchDAO { private final String elasticSearchImplementation; @Nonnull private final SearchConfiguration searchConfiguration; + @Nullable + private final CustomSearchConfiguration customSearchConfiguration; public long docCount(@Nonnull String entityName) { EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); @@ -75,7 +78,9 @@ public class ESSearchDAO { log.debug("Executing request {}: {}", id, searchRequest); final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); // extract results, validated against document model as well - return SearchRequestHandler.getBuilder(entitySpec, searchConfiguration).extractResult(searchResponse, filter, from, size); + return SearchRequestHandler + .getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + .extractResult(searchResponse, filter, from, size); } catch (Exception e) { log.error("Search query failed", e); throw new ESQueryException("Search query failed:", e); @@ -91,7 +96,9 @@ public class ESSearchDAO { try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "executeAndExtract_scroll").time()) { final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); // extract results, validated against document model as well - return SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration).extractScrollResult(searchResponse, + return SearchRequestHandler + .getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + .extractScrollResult(searchResponse, filter, scrollId, keepAlive, size, supportsPointInTime()); } catch (Exception e) { if (e instanceof ElasticsearchStatusException) { @@ -126,8 +133,9 @@ public class ESSearchDAO { Timer.Context searchRequestTimer = MetricUtils.timer(this.getClass(), "searchRequest").time(); EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); // Step 1: construct the query - final SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpec, searchConfiguration) - .getSearchRequest(finalInput, postFilters, sortCriterion, from, size, searchFlags); + final SearchRequest searchRequest = SearchRequestHandler + .getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + .getSearchRequest(finalInput, postFilters, sortCriterion, from, size, searchFlags); searchRequest.indices(indexConvention.getIndexName(entitySpec)); searchRequestTimer.stop(); // Step 2: execute the query and extract results, validated against document model as well @@ -148,7 +156,9 @@ public class ESSearchDAO { @Nullable SortCriterion sortCriterion, int from, int size) { EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration).getFilterRequest(filters, sortCriterion, from, size); + SearchRequestHandler + .getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + .getFilterRequest(filters, sortCriterion, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); return executeAndExtract(entitySpec, searchRequest, filters, from, size); } @@ -252,8 +262,9 @@ public class ESSearchDAO { } // Step 1: construct the query - final SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration) - .getSearchRequest(finalInput, postFilters, sortCriterion, sort, pitId, keepAlive, size, searchFlags); + final SearchRequest searchRequest = SearchRequestHandler + .getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + .getSearchRequest(finalInput, postFilters, sortCriterion, sort, pitId, keepAlive, size, searchFlags); // PIT specifies indices in creation so it doesn't support specifying indices on the request, so we only specify if not using PIT if (!supportsPointInTime()) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java new file mode 100644 index 0000000000..6186a90114 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java @@ -0,0 +1,43 @@ +package com.linkedin.metadata.search.elasticsearch.query.request; + +import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import lombok.Builder; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +import javax.annotation.Nullable; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + + +@Slf4j +@Builder(builderMethodName = "hiddenBuilder") +@Getter +public class CustomizedQueryHandler { + private CustomSearchConfiguration customSearchConfiguration; + @Builder.Default + private List> queryConfigurations = List.of(); + + public Optional lookupQueryConfig(String query) { + return queryConfigurations.stream() + .filter(e -> e.getKey().matcher(query).matches()) + .map(Map.Entry::getValue) + .findFirst(); + } + + public static CustomizedQueryHandlerBuilder builder(@Nullable CustomSearchConfiguration customSearchConfiguration) { + CustomizedQueryHandlerBuilder builder = hiddenBuilder() + .customSearchConfiguration(customSearchConfiguration); + + if (customSearchConfiguration != null) { + builder.queryConfigurations(customSearchConfiguration.getQueryConfigurations().stream() + .map(cfg -> Map.entry(Pattern.compile(cfg.getQueryRegex()), cfg)) + .collect(Collectors.toList())); + } + return builder; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchFieldConfig.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchFieldConfig.java index 3e9f76d425..68a5278563 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchFieldConfig.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchFieldConfig.java @@ -4,6 +4,7 @@ import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import lombok.Builder; import lombok.Getter; +import lombok.experimental.Accessors; import javax.annotation.Nonnull; @@ -16,6 +17,7 @@ import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBu @Builder @Getter +@Accessors(fluent = true) public class SearchFieldConfig { public static final float DEFAULT_BOOST = 1.0f; @@ -61,41 +63,47 @@ public class SearchFieldConfig { @Nonnull private final String fieldName; + @Nonnull + private final String shortName; @Builder.Default private final Float boost = DEFAULT_BOOST; private final String analyzer; private boolean hasKeywordSubfield; private boolean hasDelimitedSubfield; + private boolean isQueryByDefault; + private boolean isDelimitedSubfield; + private boolean isKeywordSubfield; public static SearchFieldConfig detectSubFieldType(@Nonnull SearchableFieldSpec fieldSpec) { - final String fieldName = fieldSpec.getSearchableAnnotation().getFieldName(); - final float boost = (float) fieldSpec.getSearchableAnnotation().getBoostScore(); - final SearchableAnnotation.FieldType fieldType = fieldSpec.getSearchableAnnotation().getFieldType(); - return detectSubFieldType(fieldName, boost, fieldType); + final SearchableAnnotation searchableAnnotation = fieldSpec.getSearchableAnnotation(); + final String fieldName = searchableAnnotation.getFieldName(); + final float boost = (float) searchableAnnotation.getBoostScore(); + final SearchableAnnotation.FieldType fieldType = searchableAnnotation.getFieldType(); + return detectSubFieldType(fieldName, boost, fieldType, searchableAnnotation.isQueryByDefault()); } public static SearchFieldConfig detectSubFieldType(String fieldName, - SearchableAnnotation.FieldType fieldType) { - return detectSubFieldType(fieldName, DEFAULT_BOOST, fieldType); + SearchableAnnotation.FieldType fieldType, + boolean isQueryByDefault) { + return detectSubFieldType(fieldName, DEFAULT_BOOST, fieldType, isQueryByDefault); } - public static SearchFieldConfig detectSubFieldType(String fieldName, float boost, - SearchableAnnotation.FieldType fieldType) { + public static SearchFieldConfig detectSubFieldType(String fieldName, + float boost, + SearchableAnnotation.FieldType fieldType, + boolean isQueryByDefault) { return SearchFieldConfig.builder() .fieldName(fieldName) .boost(boost) .analyzer(getAnalyzer(fieldName, fieldType)) .hasKeywordSubfield(hasKeywordSubfield(fieldName, fieldType)) .hasDelimitedSubfield(hasDelimitedSubfield(fieldName, fieldType)) + .isQueryByDefault(isQueryByDefault) .build(); } - public boolean hasDelimitedSubfield() { - return isHasDelimitedSubfield(); - } - - public boolean hasKeywordSubfield() { - return isHasKeywordSubfield(); + public boolean isKeyword() { + return KEYWORD_ANALYZER.equals(analyzer()) || isKeyword(fieldName()); } private static boolean hasDelimitedSubfield(String fieldName, SearchableAnnotation.FieldType fieldType) { @@ -108,8 +116,8 @@ public class SearchFieldConfig { && (TYPES_WITH_DELIMITED_SUBFIELD.contains(fieldType) // if delimited then also has keyword || TYPES_WITH_KEYWORD_SUBFIELD.contains(fieldType)); } - private static boolean isKeyword(String fieldName, SearchableAnnotation.FieldType fieldType) { - return fieldName.equals(".keyword") + private static boolean isKeyword(String fieldName) { + return fieldName.endsWith(".keyword") || KEYWORD_FIELDS.contains(fieldName); } @@ -118,7 +126,7 @@ public class SearchFieldConfig { if (TYPES_WITH_BROWSE_PATH.contains(fieldType)) { return BROWSE_PATH_HIERARCHY_ANALYZER; // sub fields - } else if (isKeyword(fieldName, fieldType)) { + } else if (isKeyword(fieldName)) { return KEYWORD_ANALYZER; } else if (fieldName.endsWith(".delimited")) { return TEXT_SEARCH_ANALYZER; @@ -131,4 +139,14 @@ public class SearchFieldConfig { throw new IllegalStateException(String.format("Unknown analyzer for fieldName: %s, fieldType: %s", fieldName, fieldType)); } } + + public static class SearchFieldConfigBuilder { + public SearchFieldConfigBuilder fieldName(@Nonnull String fieldName) { + this.fieldName = fieldName; + isDelimitedSubfield(fieldName.endsWith(".delimited")); + isKeywordSubfield(fieldName.endsWith(".keyword")); + shortName(fieldName.split("[.]")[0]); + return this; + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 9b79521a2f..1f54d3bbbf 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -3,6 +3,8 @@ package com.linkedin.metadata.search.elasticsearch.query.request; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchScoreAnnotation; @@ -17,6 +19,7 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import com.linkedin.metadata.search.utils.ESUtils; import org.elasticsearch.common.lucene.search.function.CombineFunction; @@ -41,33 +44,40 @@ public class SearchQueryBuilder { private final ExactMatchConfiguration exactMatchConfiguration; private final PartialConfiguration partialConfiguration; - public SearchQueryBuilder(@Nonnull SearchConfiguration searchConfiguration) { + private final CustomizedQueryHandler customizedQueryHandler; + + public SearchQueryBuilder(@Nonnull SearchConfiguration searchConfiguration, + @Nullable CustomSearchConfiguration customSearchConfiguration) { this.exactMatchConfiguration = searchConfiguration.getExactMatch(); this.partialConfiguration = searchConfiguration.getPartial(); + this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); } public QueryBuilder buildQuery(@Nonnull List entitySpecs, @Nonnull String query, boolean fulltext) { - final QueryBuilder queryBuilder = buildInternalQuery(entitySpecs, query, fulltext); - return QueryBuilders.functionScoreQuery(queryBuilder, buildScoreFunctions(entitySpecs)) - .scoreMode(FunctionScoreQuery.ScoreMode.AVG) // Average score functions - .boostMode(CombineFunction.MULTIPLY); // Multiply score function with the score from query + QueryConfiguration customQueryConfig = customizedQueryHandler.lookupQueryConfig(query).orElse(null); + + final QueryBuilder queryBuilder = buildInternalQuery(customQueryConfig, entitySpecs, query, fulltext); + return buildScoreFunctions(customQueryConfig, entitySpecs, queryBuilder); } /** * Constructs the search query. + * @param customQueryConfig custom configuration * @param entitySpecs entities being searched * @param query search string * @param fulltext use fulltext queries * @return query builder */ - private QueryBuilder buildInternalQuery(@Nonnull List entitySpecs, @Nonnull String query, boolean fulltext) { - BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); + private QueryBuilder buildInternalQuery(@Nullable QueryConfiguration customQueryConfig, @Nonnull List entitySpecs, + @Nonnull String query, boolean fulltext) { + final String sanitizedQuery = query.replaceFirst("^:+", ""); + final BoolQueryBuilder finalQuery = Optional.ofNullable(customQueryConfig) + .flatMap(cqc -> cqc.boolQueryBuilder(sanitizedQuery)) + .orElse(QueryBuilders.boolQuery()); if (fulltext && !query.startsWith(STRUCTURED_QUERY_PREFIX)) { - final String sanitizedQuery = query.replaceFirst("^:+", ""); - - getSimpleQuery(entitySpecs, sanitizedQuery).ifPresent(finalQuery::should); - getPrefixAndExactMatchQuery(entitySpecs, sanitizedQuery).ifPresent(finalQuery::should); + getSimpleQuery(customQueryConfig, entitySpecs, sanitizedQuery).ifPresent(finalQuery::should); + getPrefixAndExactMatchQuery(customQueryConfig, entitySpecs, sanitizedQuery).ifPresent(finalQuery::should); } else { final String withoutQueryPrefix = query.startsWith(STRUCTURED_QUERY_PREFIX) ? query.substring(STRUCTURED_QUERY_PREFIX.length()) : query; @@ -77,10 +87,10 @@ public class SearchQueryBuilder { .map(this::getStandardFields) .flatMap(Set::stream) .distinct() - .forEach(cfg -> queryBuilder.field(cfg.getFieldName(), cfg.getBoost())); + .forEach(cfg -> queryBuilder.field(cfg.fieldName(), cfg.boost())); finalQuery.should(queryBuilder); if (exactMatchConfiguration.isEnableStructured()) { - getPrefixAndExactMatchQuery(entitySpecs, withoutQueryPrefix).ifPresent(finalQuery::should); + getPrefixAndExactMatchQuery(null, entitySpecs, withoutQueryPrefix).ifPresent(finalQuery::should); } } @@ -93,9 +103,9 @@ public class SearchQueryBuilder { // Always present final float urnBoost = Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")); - fields.add(SearchFieldConfig.detectSubFieldType("urn", urnBoost, SearchableAnnotation.FieldType.URN)); + fields.add(SearchFieldConfig.detectSubFieldType("urn", urnBoost, SearchableAnnotation.FieldType.URN, true)); fields.add(SearchFieldConfig.detectSubFieldType("urn.delimited", urnBoost * partialConfiguration.getUrnFactor(), - SearchableAnnotation.FieldType.URN)); + SearchableAnnotation.FieldType.URN, true)); List searchableFieldSpecs = entitySpec.getSearchableFieldSpecs(); for (SearchableFieldSpec fieldSpec : searchableFieldSpecs) { @@ -107,9 +117,11 @@ public class SearchQueryBuilder { fields.add(searchFieldConfig); if (SearchFieldConfig.detectSubFieldType(fieldSpec).hasDelimitedSubfield()) { - fields.add(SearchFieldConfig.detectSubFieldType(searchFieldConfig.getFieldName() + ".delimited", - searchFieldConfig.getBoost() * partialConfiguration.getFactor(), - fieldSpec.getSearchableAnnotation().getFieldType())); + final SearchableAnnotation searchableAnnotation = fieldSpec.getSearchableAnnotation(); + + fields.add(SearchFieldConfig.detectSubFieldType(searchFieldConfig.fieldName() + ".delimited", + searchFieldConfig.boost() * partialConfiguration.getFactor(), + searchableAnnotation.getFieldType(), searchableAnnotation.isQueryByDefault())); } } @@ -124,24 +136,34 @@ public class SearchQueryBuilder { return Stream.of("\"", "'").anyMatch(query::contains); } - private Optional getSimpleQuery(List entitySpecs, String sanitizedQuery) { + private Optional getSimpleQuery(@Nullable QueryConfiguration customQueryConfig, + List entitySpecs, + String sanitizedQuery) { Optional result = Optional.empty(); - if (!isQuoted(sanitizedQuery) || !exactMatchConfiguration.isExclusive()) { + final boolean executeSimpleQuery; + if (customQueryConfig != null) { + executeSimpleQuery = customQueryConfig.isSimpleQuery(); + } else { + executeSimpleQuery = !isQuoted(sanitizedQuery) || !exactMatchConfiguration.isExclusive(); + } + + if (executeSimpleQuery) { BoolQueryBuilder simplePerField = QueryBuilders.boolQuery(); // Simple query string does not use per field analyzers // Group the fields by analyzer Map> analyzerGroup = entitySpecs.stream() .map(this::getStandardFields) .flatMap(Set::stream) - .collect(Collectors.groupingBy(SearchFieldConfig::getAnalyzer)); + .filter(SearchFieldConfig::isQueryByDefault) + .collect(Collectors.groupingBy(SearchFieldConfig::analyzer)); analyzerGroup.keySet().stream().sorted().forEach(analyzer -> { List fieldConfigs = analyzerGroup.get(analyzer); SimpleQueryStringBuilder simpleBuilder = QueryBuilders.simpleQueryStringQuery(sanitizedQuery); simpleBuilder.analyzer(analyzer); simpleBuilder.defaultOperator(Operator.AND); - fieldConfigs.forEach(cfg -> simpleBuilder.field(cfg.getFieldName(), cfg.getBoost())); + fieldConfigs.forEach(cfg -> simpleBuilder.field(cfg.fieldName(), cfg.boost())); simplePerField.should(simpleBuilder); }); @@ -151,62 +173,77 @@ public class SearchQueryBuilder { return result; } - private Optional getPrefixAndExactMatchQuery(@Nonnull List entitySpecs, String query) { + private Optional getPrefixAndExactMatchQuery(@Nullable QueryConfiguration customQueryConfig, + @Nonnull List entitySpecs, + String query) { + + final boolean isPrefixQuery = customQueryConfig == null ? exactMatchConfiguration.isWithPrefix() : customQueryConfig.isPrefixMatchQuery(); + final boolean isExactQuery = customQueryConfig == null || customQueryConfig.isExactMatchQuery(); + BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); String unquotedQuery = unquote(query); - // Exact match case-sensitive - finalQuery.should(QueryBuilders.termQuery("urn", unquotedQuery) - .boost(Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")) - * exactMatchConfiguration.getExactFactor()) - .queryName("urn")); - // Exact match case-insensitive - finalQuery.should(QueryBuilders.termQuery("urn", unquotedQuery) - .caseInsensitive(true) - .boost(Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")) - * exactMatchConfiguration.getExactFactor() - * exactMatchConfiguration.getCaseSensitivityFactor()) - .queryName("urn")); - entitySpecs.stream() - .map(EntitySpec::getSearchableFieldSpecs) - .flatMap(List::stream) - .map(SearchableFieldSpec::getSearchableAnnotation) - .filter(SearchableAnnotation::isQueryByDefault) - .filter(SearchableAnnotation::isEnableAutocomplete) // Proxy for identifying likely exact match fields - .forEach(srchAnnotation -> { - boolean hasDelimited = SearchFieldConfig.detectSubFieldType(srchAnnotation.getFieldName(), - srchAnnotation.getFieldType()).hasDelimitedSubfield(); + .map(this::getStandardFields) + .flatMap(Set::stream) + .filter(SearchFieldConfig::isQueryByDefault) + .forEach(searchFieldConfig -> { - if (hasDelimited && exactMatchConfiguration.isWithPrefix()) { - finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(srchAnnotation.getFieldName() + ".delimited", query) - .boost((float) srchAnnotation.getBoostScore() * exactMatchConfiguration.getCaseSensitivityFactor()) - .queryName(srchAnnotation.getFieldName())); // less than exact + if (searchFieldConfig.isDelimitedSubfield() && isPrefixQuery) { + finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(searchFieldConfig.fieldName(), query) + .boost(searchFieldConfig.boost() + * exactMatchConfiguration.getPrefixFactor() + * exactMatchConfiguration.getCaseSensitivityFactor()) + .queryName(searchFieldConfig.shortName())); // less than exact } - // Exact match case-sensitive - finalQuery.should(QueryBuilders - .termQuery(ESUtils.toKeywordField(srchAnnotation.getFieldName(), false), unquotedQuery) - .boost((float) srchAnnotation.getBoostScore() * exactMatchConfiguration.getExactFactor()) - .queryName(ESUtils.toKeywordField(srchAnnotation.getFieldName(), false))); - // Exact match case-insensitive - finalQuery.should(QueryBuilders - .termQuery(ESUtils.toKeywordField(srchAnnotation.getFieldName(), false), unquotedQuery) - .caseInsensitive(true) - .boost((float) srchAnnotation.getBoostScore() - * exactMatchConfiguration.getExactFactor() - * exactMatchConfiguration.getCaseSensitivityFactor()) - .queryName(ESUtils.toKeywordField(srchAnnotation.getFieldName(), false))); + if (searchFieldConfig.isKeyword() && isExactQuery) { + // It is important to use the subfield .keyword (it uses a different normalizer) + // The non-.keyword field removes case information + + // Exact match case-sensitive + finalQuery.should(QueryBuilders + .termQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), unquotedQuery) + .caseInsensitive(false) + .boost(searchFieldConfig.boost() + * exactMatchConfiguration.getExactFactor()) + .queryName(searchFieldConfig.shortName())); + + // Exact match case-insensitive + finalQuery.should(QueryBuilders + .termQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), unquotedQuery) + .caseInsensitive(true) + .boost(searchFieldConfig.boost() + * exactMatchConfiguration.getExactFactor() + * exactMatchConfiguration.getCaseSensitivityFactor()) + .queryName(searchFieldConfig.fieldName())); + } }); return finalQuery.should().size() > 0 ? Optional.of(finalQuery) : Optional.empty(); } - private static FunctionScoreQueryBuilder.FilterFunctionBuilder[] buildScoreFunctions(@Nonnull List entitySpecs) { + private FunctionScoreQueryBuilder buildScoreFunctions(@Nullable QueryConfiguration customQueryConfig, + @Nonnull List entitySpecs, + @Nonnull QueryBuilder queryBuilder) { + + if (customQueryConfig != null) { + // Prefer configuration function scoring over annotation scoring + return customQueryConfig.functionScoreQueryBuilder(queryBuilder); + } else { + return QueryBuilders.functionScoreQuery(queryBuilder, buildAnnotationScoreFunctions(entitySpecs)) + .scoreMode(FunctionScoreQuery.ScoreMode.AVG) // Average score functions + .boostMode(CombineFunction.MULTIPLY); // Multiply score function with the score from query; + } + } + + private static FunctionScoreQueryBuilder.FilterFunctionBuilder[] buildAnnotationScoreFunctions(@Nonnull List entitySpecs) { List finalScoreFunctions = new ArrayList<>(); + // Add a default weight of 1.0 to make sure the score function is larger than 1 finalScoreFunctions.add( - new FunctionScoreQueryBuilder.FilterFunctionBuilder(ScoreFunctionBuilders.weightFactorFunction(1.0f))); + new FunctionScoreQueryBuilder.FilterFunctionBuilder(ScoreFunctionBuilders.weightFactorFunction(1.0f))); + entitySpecs.stream() .map(EntitySpec::getSearchableFieldSpecs) .flatMap(List::stream) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 7dd1d24f5d..5b2706d6f2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.DoubleMap; import com.linkedin.data.template.LongMap; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; @@ -94,11 +95,13 @@ public class SearchRequestHandler { private final SearchQueryBuilder _searchQueryBuilder; - private SearchRequestHandler(@Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs) { - this(ImmutableList.of(entitySpec), configs); + private SearchRequestHandler(@Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, + @Nullable CustomSearchConfiguration customSearchConfiguration) { + this(ImmutableList.of(entitySpec), configs, customSearchConfiguration); } - private SearchRequestHandler(@Nonnull List entitySpecs, @Nonnull SearchConfiguration configs) { + private SearchRequestHandler(@Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, + @Nullable CustomSearchConfiguration customSearchConfiguration) { _entitySpecs = entitySpecs; List annotations = getSearchableAnnotations(); _facetFields = getFacetFields(annotations); @@ -107,16 +110,20 @@ public class SearchRequestHandler { .filter(SearchableAnnotation::isAddToFilters) .collect(Collectors.toMap(SearchableAnnotation::getFieldName, SearchableAnnotation::getFilterName, mapMerger())); _highlights = getHighlights(); - _searchQueryBuilder = new SearchQueryBuilder(configs); + _searchQueryBuilder = new SearchQueryBuilder(configs, customSearchConfiguration); _configs = configs; } - public static SearchRequestHandler getBuilder(@Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs) { - return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent(ImmutableList.of(entitySpec), k -> new SearchRequestHandler(entitySpec, configs)); + public static SearchRequestHandler getBuilder(@Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, + @Nullable CustomSearchConfiguration customSearchConfiguration) { + return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( + ImmutableList.of(entitySpec), k -> new SearchRequestHandler(entitySpec, configs, customSearchConfiguration)); } - public static SearchRequestHandler getBuilder(@Nonnull List entitySpecs, @Nonnull SearchConfiguration configs) { - return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent(ImmutableList.copyOf(entitySpecs), k -> new SearchRequestHandler(entitySpecs, configs)); + public static SearchRequestHandler getBuilder(@Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, + @Nullable CustomSearchConfiguration customSearchConfiguration) { + return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( + ImmutableList.copyOf(entitySpecs), k -> new SearchRequestHandler(entitySpecs, configs, customSearchConfiguration)); } private List getSearchableAnnotations() { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java index 738e63fdab..654c485787 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java @@ -1,9 +1,12 @@ package com.linkedin.metadata; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.client.JavaEntityClient; +import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; @@ -95,9 +98,12 @@ public class ESSampleDataFixture { @Qualifier("entityRegistry") EntityRegistry entityRegistry, @Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders, @Qualifier("sampleDataIndexConvention") IndexConvention indexConvention - ) { + ) throws IOException { + CustomConfiguration customConfiguration = new CustomConfiguration(true, "search_config_fixture_test.yml"); + CustomSearchConfiguration customSearchConfiguration = customConfiguration.customSearchConfiguration(new YAMLMapper()); + ESSearchDAO searchDAO = new ESSearchDAO(entityRegistry, _searchClient, indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration); + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, customSearchConfiguration); ESBrowseDAO browseDAO = new ESBrowseDAO(entityRegistry, _searchClient, indexConvention); ESWriteDAO writeDAO = new ESWriteDAO(entityRegistry, _searchClient, indexConvention, _bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java b/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java index 51fa85cb17..09ab67fb6f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java @@ -106,7 +106,7 @@ public class ESSearchLineageFixture { @Qualifier("searchLineageIndexConvention") IndexConvention indexConvention ) { ESSearchDAO searchDAO = new ESSearchDAO(entityRegistry, _searchClient, indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration); + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); ESBrowseDAO browseDAO = new ESBrowseDAO(entityRegistry, _searchClient, indexConvention); ESWriteDAO writeDAO = new ESWriteDAO(entityRegistry, _searchClient, indexConvention, _bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java index 7a68c9bc42..13700f7fcc 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java @@ -156,7 +156,7 @@ public class LineageSearchServiceTest extends AbstractTestNGSpringContextTests { new EntityIndexBuilders(_esIndexBuilder, _entityRegistry, _indexConvention, _settingsBuilder); ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration); + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention); ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, _bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java index cf5c186b93..42da8970bb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java @@ -117,7 +117,7 @@ public class SearchServiceTest extends AbstractTestNGSpringContextTests { new EntityIndexBuilders(_esIndexBuilder, _entityRegistry, _indexConvention, _settingsBuilder); ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration); + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention); ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, _bulkProcessor, 1); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java index 7bac79c733..57646f1160 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java @@ -81,7 +81,7 @@ public class ElasticSearchServiceTest extends AbstractTestNGSpringContextTests { EntityIndexBuilders indexBuilders = new EntityIndexBuilders(_esIndexBuilder, _entityRegistry, _indexConvention, _settingsBuilder); ESSearchDAO searchDAO = new ESSearchDAO(_entityRegistry, _searchClient, _indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration); + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, null); ESBrowseDAO browseDAO = new ESBrowseDAO(_entityRegistry, _searchClient, _indexConvention); ESWriteDAO writeDAO = new ESWriteDAO(_entityRegistry, _searchClient, _indexConvention, _bulkProcessor, 1); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java index 273442206b..37cdaa56fb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java @@ -119,8 +119,8 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { for (SearchableFieldSpec fieldSpec : entitySpec.getSearchableFieldSpecs()) { SearchFieldConfig test = SearchFieldConfig.detectSubFieldType(fieldSpec); - if (!test.getFieldName().contains(".")) { - Map actual = mappings.get(test.getFieldName()); + if (!test.fieldName().contains(".")) { + Map actual = mappings.get(test.fieldName()); final String expectedAnalyzer; if (actual.get("search_analyzer") != null) { @@ -131,36 +131,36 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { expectedAnalyzer = "keyword"; } - assertEquals(test.getAnalyzer(), expectedAnalyzer, + assertEquals(test.analyzer(), expectedAnalyzer, String.format("Expected search analyzer to match for entity: `%s`field: `%s`", - entitySpec.getName(), test.getFieldName())); + entitySpec.getName(), test.fieldName())); if (test.hasDelimitedSubfield()) { assertTrue(((Map>) actual.get("fields")).containsKey("delimited"), String.format("Expected entity: `%s` field to have .delimited subfield: `%s`", - entitySpec.getName(), test.getFieldName())); + entitySpec.getName(), test.fieldName())); } else { boolean nosubfield = !actual.containsKey("fields") || !((Map>) actual.get("fields")).containsKey("delimited"); assertTrue(nosubfield, String.format("Expected entity: `%s` field to NOT have .delimited subfield: `%s`", - entitySpec.getName(), test.getFieldName())); + entitySpec.getName(), test.fieldName())); } if (test.hasKeywordSubfield()) { assertTrue(((Map>) actual.get("fields")).containsKey("keyword"), String.format("Expected entity: `%s` field to have .keyword subfield: `%s`", - entitySpec.getName(), test.getFieldName())); + entitySpec.getName(), test.fieldName())); } else { boolean nosubfield = !actual.containsKey("fields") || !((Map>) actual.get("fields")).containsKey("keyword"); assertTrue(nosubfield, String.format("Expected entity: `%s` field to NOT have .keyword subfield: `%s`", - entitySpec.getName(), test.getFieldName())); + entitySpec.getName(), test.fieldName())); } } else { // this is a subfield therefore cannot have a subfield assertFalse(test.hasKeywordSubfield()); assertFalse(test.hasDelimitedSubfield()); - String[] fieldAndSubfield = test.getFieldName().split("[.]", 2); + String[] fieldAndSubfield = test.fieldName().split("[.]", 2); Map actualParent = mappings.get(fieldAndSubfield[0]); Map actualSubfield = ((Map>) actualParent.get("fields")).get(fieldAndSubfield[0]); @@ -168,8 +168,8 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { String expectedAnalyzer = actualSubfield.get("search_analyzer") != null ? (String) actualSubfield.get("search_analyzer") : "keyword"; - assertEquals(test.getAnalyzer(), expectedAnalyzer, - String.format("Expected search analyzer to match for field `%s`", test.getFieldName())); + assertEquals(test.analyzer(), expectedAnalyzer, + String.format("Expected search analyzer to match for field `%s`", test.fieldName())); } } } @@ -195,7 +195,7 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { final SearchResult result = search(searchService, "test"); Map expectedTypes = Map.of( - "dataset", 10, + "dataset", 13, "chart", 0, "container", 1, "dashboard", 0, @@ -1132,6 +1132,7 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { "Expected exact match and 1st position"); } + // Note: This test can fail if not using .keyword subfields (check for possible query builder regression) @Test public void testPrefixVsExactCaseSensitivity() { List insensitiveExactMatches = List.of("testExactMatchCase", "testexactmatchcase", "TESTEXACTMATCHCASE"); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java new file mode 100644 index 0000000000..f7cbf53a77 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java @@ -0,0 +1,178 @@ +package com.linkedin.metadata.search.elasticsearch.query.request; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.linkedin.metadata.config.search.CustomConfiguration; +import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; +import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import org.elasticsearch.common.lucene.search.function.CombineFunction; +import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +public class CustomizedQueryHandlerTest { + public static final ObjectMapper TEST_MAPPER = new YAMLMapper(); + private static final CustomSearchConfiguration TEST_CONFIG; + static { + try { + TEST_CONFIG = new CustomConfiguration(true, "search_config_test.yml") + .customSearchConfiguration(TEST_MAPPER); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + private static final List EXPECTED_CONFIGURATION = List.of( + QueryConfiguration.builder() + .queryRegex("[*]|") + .simpleQuery(false) + .exactMatchQuery(false) + .prefixMatchQuery(false) + .functionScore(Map.of("score_mode", "avg", "boost_mode", "multiply", + "functions", List.of( + Map.of( + "weight", 1, + "filter", Map.of("match_all", Map.of())), + Map.of( + "weight", 0.5, + "filter", Map.of("term", Map.of( + "materialized", Map.of("value", true) + ))), + Map.of( + "weight", 0.5, + "filter", Map.of("term", Map.of( + "deprecated", Map.of("value", true) + ))) + ))) + .build(), + QueryConfiguration.builder() + .queryRegex(".*") + .simpleQuery(true) + .exactMatchQuery(true) + .prefixMatchQuery(true) + .boolQuery(BoolQueryConfiguration.builder() + .must(List.of( + Map.of("term", Map.of("name", "{{query_string}}")) + )) + .build()) + .functionScore(Map.of("score_mode", "avg", "boost_mode", "multiply", + "functions", List.of( + Map.of( + "weight", 1, + "filter", Map.of("match_all", Map.of())), + Map.of( + "weight", 0.5, + "filter", Map.of("term", Map.of( + "materialized", Map.of("value", true) + ))), + Map.of( + "weight", 1.5, + "filter", Map.of("term", Map.of( + "deprecated", Map.of("value", false) + ))) + ))) + .build() + ); + + + @Test + public void configParsingTest() { + assertNotNull(TEST_CONFIG); + assertEquals(TEST_CONFIG.getQueryConfigurations(), EXPECTED_CONFIGURATION); + } + + @Test + public void customizedQueryHandlerInitTest() { + CustomizedQueryHandler test = CustomizedQueryHandler.builder(TEST_CONFIG).build(); + + assertEquals(test.getQueryConfigurations().stream().map(e -> e.getKey().toString()).collect(Collectors.toList()), + List.of("[*]|", ".*")); + + assertEquals(test.getQueryConfigurations().stream() + .map(e -> Map.entry(e.getKey().toString(), e.getValue())) + .collect(Collectors.toList()), + EXPECTED_CONFIGURATION.stream() + .map(cfg -> Map.entry(cfg.getQueryRegex(), cfg)) + .collect(Collectors.toList())); + } + + @Test + public void patternMatchTest() { + CustomizedQueryHandler test = CustomizedQueryHandler.builder(TEST_CONFIG).build(); + + for (String selectAllQuery: List.of("*", "")) { + QueryConfiguration actual = test.lookupQueryConfig(selectAllQuery).get(); + assertEquals(actual, EXPECTED_CONFIGURATION.get(0), String.format("Failed to match: `%s`", selectAllQuery)); + } + + for (String otherQuery: List.of("foo", "bar")) { + QueryConfiguration actual = test.lookupQueryConfig(otherQuery).get(); + assertEquals(actual, EXPECTED_CONFIGURATION.get(1)); + } + } + + @Test + public void functionScoreQueryBuilderTest() { + CustomizedQueryHandler test = CustomizedQueryHandler.builder(TEST_CONFIG).build(); + MatchAllQueryBuilder inputQuery = QueryBuilders.matchAllQuery(); + + /* + * Test select star + */ + FunctionScoreQueryBuilder selectStarTest = test.lookupQueryConfig("*").get().functionScoreQueryBuilder(inputQuery); + + FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedSelectStarScoreFunctions = { + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + ScoreFunctionBuilders.weightFactorFunction(1f) + ), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + QueryBuilders.termQuery("materialized", true), + ScoreFunctionBuilders.weightFactorFunction(0.5f) + ), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + QueryBuilders.termQuery("deprecated", true), + ScoreFunctionBuilders.weightFactorFunction(0.5f) + ) + }; + FunctionScoreQueryBuilder expectedSelectStar = new FunctionScoreQueryBuilder(expectedSelectStarScoreFunctions) + .scoreMode(FunctionScoreQuery.ScoreMode.AVG) + .boostMode(CombineFunction.MULTIPLY); + + assertEquals(selectStarTest, expectedSelectStar); + + /* + * Test default (non-select start) + */ + FunctionScoreQueryBuilder defaultTest = test.lookupQueryConfig("foobar").get().functionScoreQueryBuilder(inputQuery); + + FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedDefaultScoreFunctions = { + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + ScoreFunctionBuilders.weightFactorFunction(1f) + ), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + QueryBuilders.termQuery("materialized", true), + ScoreFunctionBuilders.weightFactorFunction(0.5f) + ), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + QueryBuilders.termQuery("deprecated", false), + ScoreFunctionBuilders.weightFactorFunction(1.5f) + ) + }; + FunctionScoreQueryBuilder expectedDefault = new FunctionScoreQueryBuilder(expectedDefaultScoreFunctions) + .scoreMode(FunctionScoreQuery.ScoreMode.AVG) + .boostMode(CombineFunction.MULTIPLY); + + assertEquals(defaultTest, expectedDefault); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java index 5733a7a37a..27c8127a29 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java @@ -1,16 +1,22 @@ package com.linkedin.metadata.search.elasticsearch.query.request; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.google.common.collect.ImmutableList; import com.linkedin.metadata.TestEntitySpecBuilder; + +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.util.Pair; import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryStringQueryBuilder; @@ -47,7 +53,7 @@ public class SearchQueryBuilderTest { testQueryConfig.setExactMatch(exactMatchConfiguration); testQueryConfig.setPartial(partialConfiguration); } - public static final SearchQueryBuilder TEST_BUILDER = new SearchQueryBuilder(testQueryConfig); + public static final SearchQueryBuilder TEST_BUILDER = new SearchQueryBuilder(testQueryConfig, null); @Test public void testQueryBuilderFulltext() { @@ -110,13 +116,15 @@ public class SearchQueryBuilderTest { } }).collect(Collectors.toList()); - assertEquals(prefixFieldWeights, List.of( + assertEquals(prefixFieldWeights.size(), 22); + + List.of( Pair.of("urn", 100.0f), Pair.of("urn", 70.0f), - Pair.of("keyPart1.delimited", 7.0f), + Pair.of("keyPart1.delimited", 16.8f), Pair.of("keyPart1.keyword", 100.0f), Pair.of("keyPart1.keyword", 70.0f) - )); + ).forEach(p -> assertTrue(prefixFieldWeights.contains(p), "Missing: " + p)); // Validate scorer FunctionScoreQueryBuilder.FilterFunctionBuilder[] scoringFunctions = result.filterFunctionBuilders(); @@ -147,4 +155,87 @@ public class SearchQueryBuilderTest { FunctionScoreQueryBuilder.FilterFunctionBuilder[] scoringFunctions = result.filterFunctionBuilders(); assertEquals(scoringFunctions.length, 3); } + + private static final SearchQueryBuilder TEST_CUSTOM_BUILDER; + static { + try { + CustomSearchConfiguration customSearchConfiguration = new CustomConfiguration( + true, "search_config_builder_test.yml").customSearchConfiguration(new YAMLMapper()); + TEST_CUSTOM_BUILDER = new SearchQueryBuilder(testQueryConfig, customSearchConfiguration); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test + public void testCustomSelectAll() { + for (String triggerQuery : List.of("*", "")) { + FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_CUSTOM_BUILDER + .buildQuery(ImmutableList.of(TestEntitySpecBuilder.getSpec()), triggerQuery, true); + + BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); + List shouldQueries = mainQuery.should(); + assertEquals(shouldQueries.size(), 0); + } + } + + @Test + public void testCustomExactMatch() { + for (String triggerQuery : List.of("test_table", "'single quoted'", "\"double quoted\"")) { + FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_CUSTOM_BUILDER + .buildQuery(ImmutableList.of(TestEntitySpecBuilder.getSpec()), triggerQuery, true); + + BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); + List shouldQueries = mainQuery.should(); + assertEquals(shouldQueries.size(), 1, String.format("Expected query for `%s`", triggerQuery)); + + BoolQueryBuilder boolPrefixQuery = (BoolQueryBuilder) shouldQueries.get(0); + assertTrue(boolPrefixQuery.should().size() > 0); + + List queries = boolPrefixQuery.should().stream().map(prefixQuery -> { + if (prefixQuery instanceof MatchPhrasePrefixQueryBuilder) { + return (MatchPhrasePrefixQueryBuilder) prefixQuery; + } else { + // exact + return (TermQueryBuilder) prefixQuery; + } + }).collect(Collectors.toList()); + + assertFalse(queries.isEmpty(), "Expected queries with specific types"); + } + } + + @Test + public void testCustomDefault() { + for (String triggerQuery : List.of("foo", "bar", "foo\"bar", "foo:bar")) { + FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_CUSTOM_BUILDER + .buildQuery(ImmutableList.of(TestEntitySpecBuilder.getSpec()), triggerQuery, true); + + BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); + List shouldQueries = mainQuery.should(); + assertEquals(shouldQueries.size(), 3); + + List queries = mainQuery.should().stream().map(query -> { + if (query instanceof SimpleQueryStringBuilder) { + return (SimpleQueryStringBuilder) query; + } else if (query instanceof MatchAllQueryBuilder) { + // custom + return (MatchAllQueryBuilder) query; + } else { + // exact + return (BoolQueryBuilder) query; + } + }).collect(Collectors.toList()); + + assertEquals(queries.size(), 3, "Expected queries with specific types"); + + // validate query injection + List mustQueries = mainQuery.must(); + assertEquals(mustQueries.size(), 1); + TermQueryBuilder termQueryBuilder = (TermQueryBuilder) mainQuery.must().get(0); + + assertEquals(termQueryBuilder.fieldName(), "fieldName"); + assertEquals(termQueryBuilder.value().toString(), triggerQuery); + } + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java index b63e8d6fe2..228c1bff5b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java @@ -74,7 +74,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { @Test public void testDatasetFieldsAndHighlights() { EntitySpec entitySpec = entityRegistry.getEntitySpec("dataset"); - SearchRequestHandler datasetHandler = SearchRequestHandler.getBuilder(entitySpec, testQueryConfig); + SearchRequestHandler datasetHandler = SearchRequestHandler.getBuilder(entitySpec, testQueryConfig, null); /* Ensure efficient query performance, we do not expect upstream/downstream/fineGrained lineage @@ -89,7 +89,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { @Test public void testSearchRequestHandler() { - SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig); + SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); SearchRequest searchRequest = requestHandler.getSearchRequest("testQuery", null, null, 0, 10, new SearchFlags().setFulltext(false)); SearchSourceBuilder sourceBuilder = searchRequest.source(); @@ -118,7 +118,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { @Test public void testFilteredSearch() { - final SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig); + final SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); final BoolQueryBuilder testQuery = constructFilterQuery(requestHandler, false); @@ -398,7 +398,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { )); final SearchRequestHandler requestHandler = SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), testQueryConfig); + TestEntitySpecBuilder.getSpec(), testQueryConfig, null); return (BoolQueryBuilder) requestHandler .getSearchRequest("", filter, null, 0, 10, new SearchFlags().setFulltext(false)) diff --git a/metadata-io/src/test/resources/elasticsearch/sample_data/datasetindex_v2.json.gz b/metadata-io/src/test/resources/elasticsearch/sample_data/datasetindex_v2.json.gz index dd2f3e08747e5c4bd3c5268bde0950cd22ed7af3..3c38926a33ee7ea877bc76cc29d59e2e8baa0e5b 100644 GIT binary patch literal 3688 zcmV-u4wvyCiwFp1{6=H|17u-zVRL14X>Md?cwcrhE^2dcZUF6FQFGfi5`Mp5fzivg zGZJMfRJC0JF%9}#6tJ`$5g5GHf{B-FQlaL^oGR&w$926y%vkVr$exO zq2Pv&)7K=DDrQu3*$OaXdcOdS+oFdQED$H*rHdVhq65zz?skCXgu6fkZ6qK1;3l*>E~L#(SP-$BMTo3C zV9FC%Km(D4>QF+&96nY?2y~WC|D@lydp-EC(;wLVGyAlso$gVC$Gqg*j~#lHcNjSg z|IagS7~Kv(YUE?INz5pOiE>(DDqyfMtpKyBL>@}OD1qXD!bMr`?FNe#3lK$zK6^;8 zIMCHcrgvqq*y)lZ>-iCa#On2iTP$kbcw7=K9|3YQq>?*?hd2n@YJ?6&q6j4!<2C`Z zd*=K2S3(H^N9GW8$cQ>hbAV@Tm^b;eBzfH0!fGe0&^*z20$yU~VA_+Fg-I|3Hexi3 zm^p7BY^Qj@>Vz&&9M($%jbP$ z51l9hDV+{dK^z9*doS!U6A(J>Kn%Ih)zs!e1ToNZs68|()jj3P##l2On#Msnm5+cr z_mX(v{{ECum^tW}5yPK2F~lse9dVP`glmjI7c`_U?YzIv$WRQT#i*WC>ym(NDPNZ- zWc<5T9d(y?)F6q-xV9c~>=XI4WB>lTV}_s5(8ag6>=SoH+6ZQBMPbyT)hHGL0hpe2 zP6nOcGmCs@dQl(>nFv3DnH?m7lp~@s%;-ww0u)Ic>N^Qo4k(69-7^-FZ`jc4!MuY4 zyrr?#J5yd;cZDefOfVu;vGB<_7D)uT24U3kpcJO1cxsW~|MlVfBSE(uaa+nOYppAhM`*6%C&unz47>Q3ffMr3<&cjH%zhwE zs%nD86&NZ71F*?TvSu{kk!b##7lQOLXBJXR0 z(l%~CN+1CTwB3;?=2#{c*Kad&M&H`Bn`%Est&=hh4Bkw6P8#6SIcR;o&PRya(8Js*j?Y*!v5}0fN+Ayh% zrRDe{acSa$rN%;xeMTb=Ya9{Spdz1ux_Qo{M`YXFTZ<#Ve57gx=vD}@-WV-i_?*~h zgC`#oi=A<~@XHm#rb`dOHK69lZq^@$C?b(^Z$kI@!| zMHmkUFplXjy-iKHl*tw?R_JH{y|ri*_6dClEP^UO#JZJ?K1&N9zkMCQeI37j9lw1Y zzkMCQeI37j?d026<0*FDFH1A6se9NQn|AONQ*G^ZwR#TDg|6O}W-_*Eu#K5^2Xp7l z<{NA=+k;JG4+*We`wQhcLAQ(Os?GWHaAro!wZUBCy`M?5)@?CoPK(PZ)<5xd zIpCyuetb6BSo*kUQ}zs^l_y)akI!YhzaIq7ectxH5i--g-iHs%2W|G&n8YtbkGaKr z523lsjn)+>Rf zJRYn(9;`ebtUTV}aJ<3cc!R_728Tzx!C}Q0(NYv)5%?pE^Z9kIjw61S7pM{T%R6RzW!MqXS0+riy!IS?z+aYsjYw0lj1$Y+1W z=huce%3I~}&?xG^jQAaOyriYibVa?FBYMa130ut%X{zTM8~#7Ig3Y!&wSP&&v z?(E5I3)##04~)6;wQv%gB6M)-&5U17&|gI`sEzA0zih3HyPCGO0wSk&f9{Du7Ah-TTI{Dhozg_Ym452v#B)!Ltg;C-&)!wSmkh ztUYp+jp%~R0sy57`^=kcl{LF$@#Hq;AqH9M`#P{{mLz3qw!Y#CSs#zkPHXZ(7COQO zqL+ypb)G+BaL|T;vkzMSf(O@7hEX(`=miaatS0ZV=Mxmb1Y2{79@ z1QRCF#xA!o#I;cx?9d>i*JQi*M@Zz^mi;khdAR=AY#ZxRU{||z`k6wXZ&eVYN}B|X zv!pK)QYO5}K)+~%;u~}!H@G#R(&O)GK2p%dO(s%GFsmdwM$e&X*6>1zJI;VeLAGs< zgs0omr{++y(W(Jusxnai@t1O4rlC#V|1p#-eR=IHJbzU-eWN{_j`bj zBxbl8L|!vkLLWr)-%D_&vp761uQEtSCAS(%mRDwl zH`m&K^{d7bv;Z-hooA@@+_E$5soSnd|2J6P*NX@bU)jAg{ROyA`ltJO;P^YhBmE9g zyNIvwr>kD4&=-VW-gQ#o%eh0 zY5(Mu^#<-s#(9XVe$3m#CS=e;T>l}ON+(m1xeC2=)sdJY^xF##D)oN~E-N)ab z{($S-hQ3gTMLFnJaA*KZIoQQQ7$KUywDDzrA-uM;)NvflT&B{l1=2d(AxUnf)fOvC zvdVUY4fY1;@D>Z-M;y1a2NU(uhV7d-*EjhkJ(VQIoC2xml@Al&FRE(Z^U!Q?pH*Lm z0I|?vhU|D{&hg5e) zdvIb84(mc4#Jrq_BAf7pXCf9FMeRwkcP=^qzx$byU?HNT<^^%h}LhZa=Z&@ zQJF+8%m0wivgA+KH%t6dRtp|7uuuAS|EogMH``acpun2Hu$N51kW+x1oWnFlS%FPW zfzJ8n@2@Uy-oCoI`0nD>^_v4T#i9b6ngV8Oll9kGVD4iAl2`1*C@h{=FHn(*?B+eM z{<5^(N{inuQ!CAS*w8pUz+3$(BwJeKhpj&=&ArsXMGNU~gVeVj{2yMd?cwcrhE^2dcZUF6FUvt|w5`Vu>fzivg zGZJM6Yve9g)NyvVrK9h~}Um9c19-vJB zxob8=#D^TaM`9@FEu$l0D@(#|o#j>AN!qUL!`fw}4E zIuap^6gLVuK<0^|J%;|d2rfF-iFFFJfpD3Ps$Tf;EZb61u*0xjP|uPdkx$yAbVkgl7bB%YdERcDS7{BI0+yB|e51FaZjU?G2PuMF zdo5fdgiixLU{cMgkyX^|EhTS{Pfd4V+5pBH%sM+#BPw4p=|nt4Yc$SP6tiX{V!jwN z81(`5B)kte^W1DW)Aly_1=d9%m}cK>cr<;!OIm&B2`Onbxn+@$t<&;culeFPv=5fg z72MKc@*0OyMT}}LTLDHyALf8@m-mpI1)?~(aI+PGGgO8;V0-dGRJ?VDsP6z&Lb$fRA%s%aCr+d`kF)R7@V}~B)9Yzkr z|MQF+Mz_O{8rc}F6EkvQqL@~g3K%R*E5K|jmWL8B3ZOWka8Z`9B=?TfmjhJ4zv*gv*q-LPA{h>&_WZ`z#`XIS5~nW z)hkxM0{10fyuZEtl88aknub-X>jFYF4Kb$4$S*M;L`pSc#MlYnjGc1LwLbt%9TN$+ z9^Gq&%FLZ@DdNasSw!3E!ml3tWnoxFG>}M%;;+}mdeh04c`wV!lhk9s=GYC_@>w70 zLo19yN~eWX5Qjnd&I^0U1cXl87k%z=HLvY7hyZ08CF> zC%snZnMJ-gy(kccjD;7&%np)3$`R5CW^^So0rDgc^_&9Zk4=^`{s*JcYJ4ZqqR2L5hh70F2ggVq z00v(g|3GG3I}2nTkh5bH$%mzv<6$n3H4TMvEQwB*jw`qXGE6~80GM2WcUX*IxPq`1 z#Q*>ksagcX7RE1#O;}Pccj;O_N>({nTlTP8i-j z1h{@pas`pRWtp-gqA->$I{!|0w|1U8qLCa?uLZJAaAFSJR`2B5Y3GHNlE7pG(1uB6 zC@semu}fnYEHxBj=rI~{SmOxa1{L`j)Xj1pJtEuY-k2Zx#UoWIK)XbM)y8Py!so<3 z>pl6HAa^jG18b$-3`?$qKmEvjw6%%036c6Nw`u*P*U!S_>mXnyu1{?Et=m*he~h*$ zEW&uugK
    8)$Rg-o_+v0Oj<@2z>GuuteaU=dV#6YG{T`YbJc{PuPH_I3RBb^P{q z{PuPH_I3RBwUcjOwWruwzbwqOrf#!2HtFCgrrOHsYWWd zKH#Kzc6`>^So*kUQ}zs^r6*gqkIzNBzc&KsK5zS83z^AY@56`XMw`7gCh^PAV{Y-@ zLul@DqjkxNm2Dz(zmIaQh0W~z*5RXb!vkDTcW`SDLAv|HQ^!Nu$3xl2L)phe*~f#G z$Agu}gO$gFmB$+#jyE_QZ*Vx?;P7ZSI4t=hT8JVn1b<|aK8&(Rnv8*9QVCZX(A6l`%9Q3Zlf) zojs{-A$vLhkug`k5>AX$gbq%fsqxDRy2}W9m2rLQmyM-ySJO6@K;+c!uDsQ9w0$1g;x$b8mY~Bq)y`tbGc&fw-x@FtDO&#xylk3dBcX( znal%Ej(BJ?y}O*25#TlZt$h1&(lm z=pt5w*7HXU4%!fq*5P8*X5->Ar3l*Tms9<;&M&$+EhSq`kw@t1O4rlDmF*;RXZ!ucon)LztFk%ri_j`bj zBx1N4L|#)^LLWr)-%D^Nvp77?<&i~jX%$j*nHS_X1GUr@*M-jnuQEtRC9@hzmQ|*O zw>R2<^{d7bv;Z;coo6WZ+@dq=soO3||JPXFSBnS_U)h~A{ROy6`ltJO;P^YhBmE9g zxri_Er=_^ip19(1UVkD#dd+0KEuSfB4kn{1ls)d|>it4&+P=rY@8eop(F# zY4_xmb$aeu|HboD=j=IksC)YCIdj~;TXw=!YhEo&6#}&m?VQgpv6H56%>M>jeqmJd z`)a8;6-b*&og#g`lWx|S-ksFUSYp03f$-|*3X*ipe5Nwb{mJwh^w_~Sos!e8`5>n4 zHXEvX_UW@#18s%V@xW!D_Q2t^9Q)yHsVKD^&8#?!-iF^h;1AyB^^^V*0*MSypIs5d z7~}Kj;!FB^vq9|HCS$=xhdzF;I#1_U)iOP)k1Z6dzQ65gyV{<)&u-_zI%=)!WKTP5 zE)=V-lRb`it>!nf*G#{3M?$E!S=h9T>m0l9+=nZzd}WXHsdFEPxF##C)hcdZ-N)ab z{)Fq>n!ZqlMcHeYaHs)F+1tfJ7$KUzH1TD2A-uA)RB`N0U8dBoInpZIAxUPXTrLmW5L2NU&@hRxg8H@Dd(J*6c1oC2w5l@DUi%d2YMv(RjCpH^Q5 z0I|SfhU|D{&hg5e7pT!I!E8Z|`NW$bc(gu|JUEoc0uJ`&AMC-RyIipKx zQJO?9%KwnhqU2B4*Gv2oR&yTGvroEq_p4meH{Dk|r@)H8uoq0hkdu#`oWnFlQGs<$ zfzJ8*?=LTJ-@d%P{QmOg&Fcd*#k>ORngV8Oll50wVCG{2l9%km$St0iFHn(*Y-c?$ z|FX2$N}De>wI#TZSNK!Nth9WaEjCMSyU?0>bLbC&)UzG@zib3HUfORbyAN#Km5^?h s+;9YDG~Rd|x?35CRQbmpg~dMz)wO7!{<;k-b_-VgKkBgS*5rKv07H@;PXGV_ diff --git a/metadata-io/src/test/resources/search_config_builder_test.yml b/metadata-io/src/test/resources/search_config_builder_test.yml new file mode 100644 index 0000000000..4f3a7ce3b7 --- /dev/null +++ b/metadata-io/src/test/resources/search_config_builder_test.yml @@ -0,0 +1,74 @@ +# Used for testing more real-world configurations + +queryConfigurations: + # Criteria for exact-match only + # Contains `_`, `'`, `"` then use exact match query + - queryRegex: >- + ["'].+["']|\S+_\S+ + simpleQuery: false + prefixMatchQuery: true + exactMatchQuery: true + functionScore: + functions: + - filter: + match_all: {} + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + - filter: + term: + deprecated: + value: true + weight: 0.5 + score_mode: avg + boost_mode: multiply + + # Select * + - queryRegex: '[*]|' + simpleQuery: false + prefixMatchQuery: false + exactMatchQuery: false + functionScore: + functions: + - filter: + match_all: {} + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + - filter: + term: + deprecated: + value: true + weight: 0.5 + score_mode: avg + boost_mode: multiply + + # default + - queryRegex: .* + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + boolQuery: + should: + match_all: {} + must: + - term: + fieldName: '{{query_string}}' + functionScore: + functions: + - filter: + match_all: {} + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + score_mode: avg + boost_mode: multiply \ No newline at end of file diff --git a/metadata-io/src/test/resources/search_config_fixture_test.yml b/metadata-io/src/test/resources/search_config_fixture_test.yml new file mode 100644 index 0000000000..606ce6a2f6 --- /dev/null +++ b/metadata-io/src/test/resources/search_config_fixture_test.yml @@ -0,0 +1,51 @@ +# Use for testing with search fixtures +queryConfigurations: + # Select * + - queryRegex: '[*]|' + simpleQuery: false + prefixMatchQuery: false + exactMatchQuery: false + functionScore: + functions: + - filter: + match_all: {} + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + - filter: + term: + deprecated: + value: true + weight: 0.5 + score_mode: avg + boost_mode: multiply + + - queryRegex: .* + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + functionScore: + functions: + - filter: + match_all: {} + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + - filter: + term: + deprecated: + value: true + weight: 0.5 + - filter: + terms: + tags: + - urn:li:tag:pii + weight: 1.25 + score_mode: avg + boost_mode: multiply \ No newline at end of file diff --git a/metadata-io/src/test/resources/search_config_test.yml b/metadata-io/src/test/resources/search_config_test.yml new file mode 100644 index 0000000000..787d7f22de --- /dev/null +++ b/metadata-io/src/test/resources/search_config_test.yml @@ -0,0 +1,55 @@ +# https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-function-score-query.html + +# First match +queryConfigurations: + # `*` or empty, select all queries + - queryRegex: '[*]|' + simpleQuery: false + prefixMatchQuery: false + exactMatchQuery: false + functionScore: + functions: + - filter: + match_all: { } + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + - filter: + term: + deprecated: + value: true + weight: 0.5 + score_mode: avg + boost_mode: multiply + + # default catch all + - queryRegex: .* + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + # {{query_string}} is the search query string + # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-bool-query.html + boolQuery: + must: + - term: + name: '{{query_string}}' + functionScore: + functions: + - filter: + match_all: {} + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + - filter: + term: + deprecated: + value: false + weight: 1.5 + score_mode: avg + boost_mode: multiply diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl index c9a0307319..c4755424a7 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl @@ -16,7 +16,8 @@ record SchemaField { @Searchable = { "fieldName": "fieldPaths", "fieldType": "TEXT", - "boostScore": 5.0 + "boostScore": 5.0, + "queryByDefault": "true" } fieldPath: SchemaFieldPath diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java index cab210f927..0c2ab52462 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java @@ -1,8 +1,13 @@ package com.linkedin.gms.factory.search; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; @@ -20,12 +25,16 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import org.springframework.context.annotation.PropertySource; +import java.io.IOException; + @Slf4j @Configuration @PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) @Import({EntityRegistryFactory.class, SettingsBuilderFactory.class}) public class ElasticSearchServiceFactory { + private static final ObjectMapper YAML_MAPPER = new YAMLMapper(); + @Autowired @Qualifier("baseElasticSearchComponents") private BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components; @@ -43,13 +52,18 @@ public class ElasticSearchServiceFactory { @Bean(name = "elasticSearchService") @Nonnull - protected ElasticSearchService getInstance(ConfigurationProvider configurationProvider) { + protected ElasticSearchService getInstance(ConfigurationProvider configurationProvider) throws IOException { log.info("Search configuration: {}", configurationProvider.getElasticSearch().getSearch()); + ElasticSearchConfiguration elasticSearchConfiguration = configurationProvider.getElasticSearch(); + SearchConfiguration searchConfiguration = elasticSearchConfiguration.getSearch(); + CustomSearchConfiguration customSearchConfiguration = searchConfiguration.getCustom() == null ? null + : searchConfiguration.getCustom().customSearchConfiguration(YAML_MAPPER); + ESSearchDAO esSearchDAO = new ESSearchDAO(entityRegistry, components.getSearchClient(), components.getIndexConvention(), - configurationProvider.getFeatureFlags().isPointInTimeCreationEnabled(), - configurationProvider.getElasticSearch().getImplementation(), configurationProvider.getElasticSearch().getSearch()); + configurationProvider.getFeatureFlags().isPointInTimeCreationEnabled(), + elasticSearchConfiguration.getImplementation(), searchConfiguration, customSearchConfiguration); return new ElasticSearchService( new EntityIndexBuilders(components.getIndexBuilder(), entityRegistry, components.getIndexConvention(), settingsBuilder), esSearchDAO, diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml index 42643403a8..626ec6d5ae 100644 --- a/metadata-service/factories/src/main/resources/application.yml +++ b/metadata-service/factories/src/main/resources/application.yml @@ -197,6 +197,9 @@ elasticsearch: partial: urnFactor: ${ELASTICSEARCH_QUERY_PARTIAL_URN_FACTOR:0.5} # multiplier on Urn token match, a partial match on Urn > non-Urn is assumed factor: ${ELASTICSEARCH_QUERY_PARTIAL_FACTOR:0.4} # multiplier on possible non-Urn token match + custom: + configEnabled: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED:false} + configFile: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:search_config.yml} graph: timeoutSeconds: ${ELASTICSEARCH_SEARCH_GRAPH_TIMEOUT_SECONDS:50} # graph dao timeout seconds batchSize: ${ELASTICSEARCH_SEARCH_GRAPH_BATCH_SIZE:1000} # graph dao batch size diff --git a/metadata-service/factories/src/main/resources/search_config.yml b/metadata-service/factories/src/main/resources/search_config.yml new file mode 100644 index 0000000000..8d65c42dd5 --- /dev/null +++ b/metadata-service/factories/src/main/resources/search_config.yml @@ -0,0 +1,71 @@ +# Notes: +# +# First match wins +# +# queryRegex = Java regex syntax +# +# functionScores - See the following for function score syntax +# https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-function-score-query.html + +queryConfigurations: + # Select * + - queryRegex: '[*]|' + simpleQuery: false + prefixMatchQuery: false + exactMatchQuery: false + boolQuery: + must_not: + term: + deprecated: + value: true + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 0.8 + score_mode: multiply + boost_mode: multiply + + # Criteria for exact-match only + # Contains quoted or contains underscore then use exact match query + - queryRegex: >- + ["'].+["']|\S+_\S+ + simpleQuery: false + prefixMatchQuery: true + exactMatchQuery: true + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 0.8 + - filter: + term: + deprecated: + value: true + weight: 0 + score_mode: multiply + boost_mode: multiply + + # default + - queryRegex: .* + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + boolQuery: + must_not: + term: + deprecated: + value: true + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 0.8 + score_mode: multiply + boost_mode: multiply diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index fe85f0a38e..9d42e7cf3e 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -66,7 +66,7 @@ public class ConfigSearchExport extends HttpServlet { .filter(Optional::isPresent) .forEach(entitySpecOpt -> { EntitySpec entitySpec = entitySpecOpt.get(); - SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpec, searchConfiguration) + SearchRequest searchRequest = SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, null) .getSearchRequest("*", null, null, 0, 0, new SearchFlags() .setFulltext(true).setSkipHighlighting(true).setSkipAggregates(true));