fix(search): restore prefix phrase match on quoted search (#11444)

This commit is contained in:
david-leifker 2024-09-20 14:07:29 -05:00 committed by GitHub
parent 2ceb8e0934
commit b17d7764e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 153 additions and 35 deletions

View File

@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev
- ${DATAHUB_LOCAL_GMS_ENV:-empty2.env} - ${DATAHUB_LOCAL_GMS_ENV:-empty2.env}
environment: &datahub-gms-dev-env environment: &datahub-gms-dev-env
<<: [*datahub-dev-telemetry-env, *datahub-gms-env] <<: [*datahub-dev-telemetry-env, *datahub-gms-env]
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml} ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml}
SKIP_ELASTICSEARCH_CHECK: false SKIP_ELASTICSEARCH_CHECK: false
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001' JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false

View File

@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference.
If you want to: If you want to:
- Exact match on term or phrase - Exact match on term or phrase
- ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22) - ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22)
- ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema) - ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile)
- Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization. - Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization.
- Exclude terms - Exclude terms

View File

@ -0,0 +1,47 @@
package com.linkedin.metadata.search.fixtures;
import static org.testng.AssertJUnit.assertEquals;
import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
import org.testng.annotations.Test;
public class SampleDataFixtureSetupTest extends AbstractTestNGSpringContextTests {
private static final String DEFAULT_CONFIG = "search_config.yaml";
private static final String TEST_FIXTURE_CONFIG = "search_config_fixture_test.yml";
private static final YAMLMapper MAPPER = new YAMLMapper();
/**
* Ensure default search configuration matches the test fixture configuration (allowing for some
* differences)
*/
@Test
public void testConfig() throws IOException {
final CustomSearchConfiguration defaultConfig;
final CustomSearchConfiguration fixtureConfig;
try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) {
defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
}
try (InputStream stream = new ClassPathResource(TEST_FIXTURE_CONFIG).getInputStream()) {
fixtureConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
// test specifics
((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions"))
.remove(1);
((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions"))
.remove(1);
}
assertEquals(fixtureConfig, defaultConfig);
}
}

View File

@ -944,7 +944,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
"covid", "covid",
2, 2,
"\"raw_orders\"", "\"raw_orders\"",
6, 1,
STRUCTURED_QUERY_PREFIX + "sample", STRUCTURED_QUERY_PREFIX + "sample",
3, 3,
STRUCTURED_QUERY_PREFIX + "\"sample\"", STRUCTURED_QUERY_PREFIX + "\"sample\"",
@ -1327,24 +1327,24 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
totalResults += numResults; totalResults += numResults;
scrollId = result.getScrollId(); scrollId = result.getScrollId();
} while (scrollId != null); } while (scrollId != null);
// expect 8 total matching results // expect 2 total matching results
assertEquals(totalResults, 8); assertEquals(totalResults, 2);
} }
@Test @Test
public void testSearchAcrossMultipleEntities() { public void testSearchAcrossMultipleEntities() {
String query = "logging_events"; String query = "logging events";
SearchResult result = search(getOperationContext(), getSearchService(), query); SearchResult result = search(getOperationContext(), getSearchService(), query);
assertEquals((int) result.getNumEntities(), 8); assertEquals((int) result.getNumEntities(), 6);
result = result =
search( search(
getOperationContext(), getOperationContext(),
getSearchService(), getSearchService(),
List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME), List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME),
query); query);
assertEquals((int) result.getNumEntities(), 8); assertEquals((int) result.getNumEntities(), 6);
result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query); result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4); assertEquals((int) result.getNumEntities(), 2);
result = result =
search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query); search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4); assertEquals((int) result.getNumEntities(), 4);
@ -1706,7 +1706,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
assertTrue( assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query)); String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8); assertEquals(result.getEntities().size(), 2);
} }
@Test @Test
@ -1729,7 +1729,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
assertTrue( assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query)); String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8); assertEquals(result.getEntities().size(), 2);
} }
@Test @Test
@ -1755,6 +1755,27 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
assertEquals(result.getEntities().size(), 8); assertEquals(result.getEntities().size(), 8);
} }
@Test
public void testQuotedPrefixDescriptionField() {
String query = "\"Constructs the fct_users_deleted\"";
SearchResult result = searchAcrossEntities(getOperationContext(), getSearchService(), query);
assertTrue(
result.hasEntities() && !result.getEntities().isEmpty(),
String.format("%s - Expected search results", query));
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 4);
assertTrue(
result.getEntities().stream()
.allMatch(
e ->
e.getMatchedFields().stream().anyMatch(m -> m.getName().equals("description"))),
"%s - Expected search results to match on description field based on prefix match");
}
@Test @Test
public void testParens() { public void testParens() {
String query = "dbt | (bigquery + covid19)"; String query = "dbt | (bigquery + covid19)";
@ -1878,7 +1899,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query)); String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 10); assertEquals(result.getEntities().size(), 2);
assertEquals( assertEquals(
result.getEntities().get(0).getEntity().toString(), result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
@ -1937,9 +1958,9 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()), result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query)); String.format("%s - Expected search results to include matched fields", query));
assertTrue( assertFalse(
result.getEntities().size() > 2, result.getEntities().isEmpty(),
String.format("%s - Expected search results to have at least two results", query)); String.format("%s - Expected search results to have at least 1 result.", query));
assertEquals( assertEquals(
result.getEntities().get(0).getEntity().toString(), result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)", "urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",

View File

@ -1,6 +1,7 @@
# Use for testing with search fixtures # Use for testing with search fixtures
queryConfigurations: queryConfigurations:
# Select * # Select */explore all
# Attempt to rank active incidents at the top followed by enrichment factors
- queryRegex: '[*]|' - queryRegex: '[*]|'
simpleQuery: false simpleQuery: false
prefixMatchQuery: false prefixMatchQuery: false
@ -8,44 +9,91 @@ queryConfigurations:
functionScore: functionScore:
functions: functions:
- filter: - filter:
match_all: {} term:
weight: 1 hasActiveIncidents:
value: true
weight: 2.0
- filter: - filter:
term: term:
materialized: hasDescription:
value: true value: true
weight: 0.5 weight: 1.25
- filter:
term:
hasOwners:
value: true
weight: 1.25
- filter:
term:
hasDomain:
value: true
weight: 1.1
- filter:
term:
hasGlossaryTerms:
value: true
weight: 1.1
- filter:
term:
hasTags:
value: true
weight: 1.1
- filter:
term:
hasRowCount:
value: true
weight: 1.05
- filter:
term:
hasColumnCount:
value: true
weight: 1.05
- filter: - filter:
term: term:
deprecated: deprecated:
value: true value: true
weight: 0.5 weight: 0.25
score_mode: avg score_mode: multiply
boost_mode: replace
# Criteria for exact-match only
# Contains quotes, is a single term with `_`, `.`, or `-` (normally consider for tokenization) then use exact match query
- queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
term:
deprecated:
value: true
weight: 0.25
- filter:
terms:
tags:
- urn:li:tag:pii
weight: 1.25
score_mode: multiply
boost_mode: multiply boost_mode: multiply
# default
- queryRegex: .* - queryRegex: .*
simpleQuery: true simpleQuery: true
prefixMatchQuery: true prefixMatchQuery: true
exactMatchQuery: true exactMatchQuery: true
functionScore: functionScore:
functions: functions:
- filter:
match_all: {}
weight: 1
- filter:
term:
materialized:
value: true
weight: 0.5
- filter: - filter:
term: term:
deprecated: deprecated:
value: true value: true
weight: 0.5 weight: 0.25
- filter: - filter:
terms: terms:
tags: tags:
- urn:li:tag:pii - urn:li:tag:pii
weight: 1.25 weight: 1.25
score_mode: avg score_mode: multiply
boost_mode: multiply boost_mode: multiply

View File

@ -7,10 +7,12 @@ import java.util.List;
import lombok.Builder; import lombok.Builder;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.Getter; import lombok.Getter;
import lombok.ToString;
@Builder(toBuilder = true) @Builder(toBuilder = true)
@Getter @Getter
@EqualsAndHashCode @EqualsAndHashCode
@ToString
@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class) @JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class)
public class CustomSearchConfiguration { public class CustomSearchConfiguration {

View File

@ -69,7 +69,7 @@ queryConfigurations:
- queryRegex: >- - queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$ ^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false simpleQuery: false
prefixMatchQuery: false prefixMatchQuery: true
exactMatchQuery: true exactMatchQuery: true
functionScore: functionScore:
functions: functions: