diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index f09a81c0c8..2d7db075e6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -370,7 +370,7 @@ public class ESSearchDAO { IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = AutocompleteRequestHandler.getBuilder( - entitySpec, customSearchConfiguration, queryFilterRewriteChain); + entitySpec, customSearchConfiguration, queryFilterRewriteChain, searchConfiguration); SearchRequest req = builder.getSearchRequest( opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 294efb069a..45359285b4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -1,6 +1,5 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES; import static com.linkedin.metadata.search.utils.ESAccessControlUtil.restrictUrn; import static com.linkedin.metadata.search.utils.ESUtils.applyDefaultSearchFilters; @@ -8,6 +7,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; @@ -35,6 +35,7 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.Pair; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.index.query.*; @@ -46,7 +47,7 @@ import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; @Slf4j public class AutocompleteRequestHandler { - private final List _defaultAutocompleteFields; + private final List _defaultAutocompleteFields; private final Map> searchableFieldTypes; private static final Map @@ -56,11 +57,13 @@ public class AutocompleteRequestHandler { private final EntitySpec entitySpec; private final QueryFilterRewriteChain queryFilterRewriteChain; + private final SearchConfiguration searchConfiguration; public AutocompleteRequestHandler( @Nonnull EntitySpec entitySpec, @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain, + @Nonnull SearchConfiguration searchConfiguration) { this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); @@ -69,8 +72,12 @@ public class AutocompleteRequestHandler { fieldSpecs.stream() .map(SearchableFieldSpec::getSearchableAnnotation) .filter(SearchableAnnotation::isEnableAutocomplete) - .map(SearchableAnnotation::getFieldName), - Stream.of("urn")) + .map( + searchableAnnotation -> + Pair.of( + searchableAnnotation.getFieldName(), + Double.toString(searchableAnnotation.getBoostScore()))), + Stream.of(Pair.of("urn", "1.0"))) .collect(Collectors.toList()); searchableFieldTypes = fieldSpecs.stream() @@ -87,17 +94,22 @@ public class AutocompleteRequestHandler { return set1; })); this.queryFilterRewriteChain = queryFilterRewriteChain; + this.searchConfiguration = searchConfiguration; } public static AutocompleteRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain, + @Nonnull SearchConfiguration searchConfiguration) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( entitySpec, k -> new AutocompleteRequestHandler( - entitySpec, customSearchConfiguration, queryFilterRewriteChain)); + entitySpec, + customSearchConfiguration, + queryFilterRewriteChain, + searchConfiguration)); } public SearchRequest getSearchRequest( @@ -169,7 +181,7 @@ public class AutocompleteRequestHandler { public BoolQueryBuilder getQuery( @Nonnull ObjectMapper objectMapper, @Nullable AutocompleteConfiguration customAutocompleteConfig, - List autocompleteFields, + List autocompleteFields, @Nonnull String query) { BoolQueryBuilder finalQuery = @@ -189,7 +201,7 @@ public class AutocompleteRequestHandler { private Optional getAutocompleteQuery( @Nullable AutocompleteConfiguration customConfig, - List autocompleteFields, + List autocompleteFields, @Nonnull String query) { Optional result = Optional.empty(); @@ -200,33 +212,39 @@ public class AutocompleteRequestHandler { return result; } - private static BoolQueryBuilder defaultQuery( - List autocompleteFields, @Nonnull String query) { + private BoolQueryBuilder defaultQuery(List autocompleteFields, @Nonnull String query) { BoolQueryBuilder finalQuery = QueryBuilders.boolQuery().minimumShouldMatch(1); // Search for exact matches with higher boost and ngram matches - MultiMatchQueryBuilder autocompleteQueryBuilder = + MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(query).type(MultiMatchQueryBuilder.Type.BOOL_PREFIX); - final float urnBoost = - Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")); autocompleteFields.forEach( - fieldName -> { - if ("urn".equals(fieldName)) { - autocompleteQueryBuilder.field(fieldName + ".ngram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._2gram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._3gram", urnBoost); - autocompleteQueryBuilder.field(fieldName + ".ngram._4gram", urnBoost); - } else { - autocompleteQueryBuilder.field(fieldName + ".ngram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._2gram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._3gram"); - autocompleteQueryBuilder.field(fieldName + ".ngram._4gram"); + pair -> { + final String fieldName = (String) pair.getLeft(); + final float boostScore = Float.parseFloat((String) pair.getRight()); + multiMatchQueryBuilder.field(fieldName + ".ngram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._2gram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._3gram"); + multiMatchQueryBuilder.field(fieldName + ".ngram._4gram"); + multiMatchQueryBuilder.field(fieldName + ".delimited"); + if (!fieldName.equalsIgnoreCase("urn")) { + multiMatchQueryBuilder.field(fieldName + ".ngram", boostScore); + multiMatchQueryBuilder.field( + fieldName + ".ngram._2gram", + boostScore * (searchConfiguration.getWordGram().getTwoGramFactor())); + multiMatchQueryBuilder.field( + fieldName + ".ngram._3gram", + boostScore * (searchConfiguration.getWordGram().getThreeGramFactor())); + multiMatchQueryBuilder.field( + fieldName + ".ngram._4gram", + boostScore * (searchConfiguration.getWordGram().getFourGramFactor())); + finalQuery.should( + QueryBuilders.matchQuery(fieldName + ".keyword", query).boost(boostScore)); } - autocompleteQueryBuilder.field(fieldName + ".delimited"); finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(fieldName + ".delimited", query)); }); - finalQuery.should(autocompleteQueryBuilder); + finalQuery.should(multiMatchQueryBuilder); return finalQuery; } @@ -241,12 +259,17 @@ public class AutocompleteRequestHandler { // Check for each field name and any subfields getAutocompleteFields(field) .forEach( - fieldName -> - highlightBuilder - .field(fieldName) - .field(fieldName + ".*") - .field(fieldName + ".ngram") - .field(fieldName + ".delimited")); + pair -> { + final String fieldName = (String) pair.getLeft(); + highlightBuilder + .field(fieldName) + .field(fieldName + ".*") + .field(fieldName + ".ngram") + .field(fieldName + ".delimited"); + if (!fieldName.equalsIgnoreCase("urn")) { + highlightBuilder.field(fieldName + ".keyword"); + } + }); // set field match req false for ngram highlightBuilder.fields().stream() @@ -256,9 +279,9 @@ public class AutocompleteRequestHandler { return highlightBuilder; } - private List getAutocompleteFields(@Nullable String field) { - if (field != null && !field.isEmpty()) { - return ImmutableList.of(field); + private List getAutocompleteFields(@Nullable String field) { + if (field != null && !field.isEmpty() && !field.equalsIgnoreCase("urn")) { + return ImmutableList.of(Pair.of(field, "10.0")); } return _defaultAutocompleteFields; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index bc3c892e07..504eb5f5fc 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -283,7 +283,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont Map.of( "dataset", 13, "chart", 0, - "container", 1, + "container", 2, "dashboard", 0, "tag", 0, "mlmodel", 0); @@ -903,6 +903,26 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont }); } + @Test + public void testContainerAutoComplete_with_exactMatch_onTop() { + List.of("container") + .forEach( + query -> { + try { + AutoCompleteResults result = + autocomplete( + getOperationContext(), new ContainerType(getEntityClient()), query); + assertTrue( + result.getSuggestions().get(0).equals("container"), + String.format( + "Expected query:`%s` on top of suggestions, found %s", + query, result.getSuggestions().get(0))); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + @Test public void testGroupAutoComplete() { List.of("T", "Te", "Tes", "Test ", "Test G", "Test Gro", "Test Group ") diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 572d79ebf2..c5205906e9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -5,6 +5,10 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import com.linkedin.metadata.TestEntitySpecBuilder; +import com.linkedin.metadata.config.search.ExactMatchConfiguration; +import com.linkedin.metadata.config.search.PartialConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.WordGramConfiguration; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; @@ -32,14 +36,44 @@ import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.testng.annotations.Test; public class AutocompleteRequestHandlerTest { - private AutocompleteRequestHandler handler = - AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - CustomSearchConfiguration.builder().build(), - QueryFilterRewriteChain.EMPTY); + private static SearchConfiguration testQueryConfig; + private static AutocompleteRequestHandler handler; private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + static { + testQueryConfig = new SearchConfiguration(); + testQueryConfig.setMaxTermBucketSize(20); + + ExactMatchConfiguration exactMatchConfiguration = new ExactMatchConfiguration(); + exactMatchConfiguration.setExclusive(false); + exactMatchConfiguration.setExactFactor(10.0f); + exactMatchConfiguration.setWithPrefix(true); + exactMatchConfiguration.setPrefixFactor(6.0f); + exactMatchConfiguration.setCaseSensitivityFactor(0.7f); + exactMatchConfiguration.setEnableStructured(true); + + WordGramConfiguration wordGramConfiguration = new WordGramConfiguration(); + wordGramConfiguration.setTwoGramFactor(1.2f); + wordGramConfiguration.setThreeGramFactor(1.5f); + wordGramConfiguration.setFourGramFactor(1.8f); + + PartialConfiguration partialConfiguration = new PartialConfiguration(); + partialConfiguration.setFactor(0.4f); + partialConfiguration.setUrnFactor(0.7f); + + testQueryConfig.setExactMatch(exactMatchConfiguration); + testQueryConfig.setWordGram(wordGramConfiguration); + testQueryConfig.setPartial(partialConfiguration); + + handler = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY, + testQueryConfig); + } + private static final QueryConfiguration TEST_QUERY_CONFIG = QueryConfiguration.builder() .queryRegex(".*") @@ -88,9 +122,12 @@ public class AutocompleteRequestHandlerTest { BoolQueryBuilder wrapper = (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); - assertEquals(query.should().size(), 3); + assertEquals(query.should().size(), 4); - MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2); + MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0); + assertEquals("keyPart1.keyword", matchQueryBuilder.fieldName()); + + MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(3); Map queryFields = autocompleteQuery.fields(); assertTrue(queryFields.containsKey("keyPart1.ngram")); assertTrue(queryFields.containsKey("keyPart1.ngram._2gram")); @@ -99,7 +136,7 @@ public class AutocompleteRequestHandlerTest { assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX); MatchPhrasePrefixQueryBuilder prefixQuery = - (MatchPhrasePrefixQueryBuilder) query.should().get(0); + (MatchPhrasePrefixQueryBuilder) query.should().get(1); assertEquals("keyPart1.delimited", prefixQuery.fieldName()); assertEquals(wrapper.mustNot().size(), 1); @@ -108,15 +145,16 @@ public class AutocompleteRequestHandlerTest { assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); List highlightedFields = highlightBuilder.fields(); - assertEquals(highlightedFields.size(), 8); + assertEquals(highlightedFields.size(), 9); assertEquals(highlightedFields.get(0).name(), "keyPart1"); assertEquals(highlightedFields.get(1).name(), "keyPart1.*"); assertEquals(highlightedFields.get(2).name(), "keyPart1.ngram"); assertEquals(highlightedFields.get(3).name(), "keyPart1.delimited"); - assertEquals(highlightedFields.get(4).name(), "urn"); - assertEquals(highlightedFields.get(5).name(), "urn.*"); - assertEquals(highlightedFields.get(6).name(), "urn.ngram"); - assertEquals(highlightedFields.get(7).name(), "urn.delimited"); + assertEquals(highlightedFields.get(4).name(), "keyPart1.keyword"); + assertEquals(highlightedFields.get(5).name(), "urn"); + assertEquals(highlightedFields.get(6).name(), "urn.*"); + assertEquals(highlightedFields.get(7).name(), "urn.ngram"); + assertEquals(highlightedFields.get(8).name(), "urn.delimited"); } @Test @@ -130,9 +168,12 @@ public class AutocompleteRequestHandlerTest { (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); assertEquals(wrapper.should().size(), 1); BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); - assertEquals(query.should().size(), 2); + assertEquals(query.should().size(), 3); - MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(1); + MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) query.should().get(0); + assertEquals("field.keyword", matchQueryBuilder.fieldName()); + + MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2); Map queryFields = autocompleteQuery.fields(); assertTrue(queryFields.containsKey("field.ngram")); assertTrue(queryFields.containsKey("field.ngram._2gram")); @@ -141,7 +182,7 @@ public class AutocompleteRequestHandlerTest { assertEquals(autocompleteQuery.type(), MultiMatchQueryBuilder.Type.BOOL_PREFIX); MatchPhrasePrefixQueryBuilder prefixQuery = - (MatchPhrasePrefixQueryBuilder) query.should().get(0); + (MatchPhrasePrefixQueryBuilder) query.should().get(1); assertEquals("field.delimited", prefixQuery.fieldName()); MatchQueryBuilder removedFilter = (MatchQueryBuilder) wrapper.mustNot().get(0); @@ -149,11 +190,12 @@ public class AutocompleteRequestHandlerTest { assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); List highlightedFields = highlightBuilder.fields(); - assertEquals(highlightedFields.size(), 4); + assertEquals(highlightedFields.size(), 5); assertEquals(highlightedFields.get(0).name(), "field"); assertEquals(highlightedFields.get(1).name(), "field.*"); assertEquals(highlightedFields.get(2).name(), "field.ngram"); assertEquals(highlightedFields.get(3).name(), "field.delimited"); + assertEquals(highlightedFields.get(4).name(), "field.keyword"); } @Test @@ -174,7 +216,8 @@ public class AutocompleteRequestHandlerTest { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -200,7 +243,8 @@ public class AutocompleteRequestHandlerTest { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -215,7 +259,7 @@ public class AutocompleteRequestHandlerTest { BoolQueryBuilder defaultQuery = (BoolQueryBuilder) shouldQueries.stream().filter(qb -> qb instanceof BoolQueryBuilder).findFirst().get(); - assertEquals(defaultQuery.should().size(), 3); + assertEquals(defaultQuery.should().size(), 4); // Custom customQuery = @@ -243,7 +287,8 @@ public class AutocompleteRequestHandlerTest { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -282,7 +327,8 @@ public class AutocompleteRequestHandlerTest { .build()) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -345,7 +391,8 @@ public class AutocompleteRequestHandlerTest { "deprecated", Map.of("value", false))))))) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); SearchRequest autocompleteRequest = explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -398,7 +445,8 @@ public class AutocompleteRequestHandlerTest { "deprecated", Map.of("value", false))))))) .build())) .build(), - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + testQueryConfig); autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -411,7 +459,7 @@ public class AutocompleteRequestHandlerTest { assertEquals(customQuery, QueryBuilders.matchAllQuery()); // standard query still present - assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 3); + assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 4); // custom functions included assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); diff --git a/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz b/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz index 2fa49c810a..bd36747255 100644 Binary files a/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz and b/metadata-io/src/test/resources/elasticsearch/sample_data/containerindex_v2.json.gz differ