fix(search): restore prefix phrase match on quoted search (#11444)

This commit is contained in:
david-leifker 2024-09-20 14:07:29 -05:00 committed by GitHub
parent 2ceb8e0934
commit b17d7764e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 153 additions and 35 deletions

View File

@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev
- ${DATAHUB_LOCAL_GMS_ENV:-empty2.env}
environment: &datahub-gms-dev-env
<<: [*datahub-dev-telemetry-env, *datahub-gms-env]
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml}
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml}
SKIP_ELASTICSEARCH_CHECK: false
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false

View File

@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference.
If you want to:
- Exact match on term or phrase
- ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22)
- ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema)
- ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22)
- ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile)
- Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization.
- Exclude terms

View File

@ -0,0 +1,47 @@
package com.linkedin.metadata.search.fixtures;
import static org.testng.AssertJUnit.assertEquals;
import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
import org.testng.annotations.Test;
public class SampleDataFixtureSetupTest extends AbstractTestNGSpringContextTests {
private static final String DEFAULT_CONFIG = "search_config.yaml";
private static final String TEST_FIXTURE_CONFIG = "search_config_fixture_test.yml";
private static final YAMLMapper MAPPER = new YAMLMapper();
/**
* Ensure default search configuration matches the test fixture configuration (allowing for some
* differences)
*/
@Test
public void testConfig() throws IOException {
final CustomSearchConfiguration defaultConfig;
final CustomSearchConfiguration fixtureConfig;
try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) {
defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
}
try (InputStream stream = new ClassPathResource(TEST_FIXTURE_CONFIG).getInputStream()) {
fixtureConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
// test specifics
((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions"))
.remove(1);
((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions"))
.remove(1);
}
assertEquals(fixtureConfig, defaultConfig);
}
}

View File

@ -944,7 +944,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
"covid",
2,
"\"raw_orders\"",
6,
1,
STRUCTURED_QUERY_PREFIX + "sample",
3,
STRUCTURED_QUERY_PREFIX + "\"sample\"",
@ -1327,24 +1327,24 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
totalResults += numResults;
scrollId = result.getScrollId();
} while (scrollId != null);
// expect 8 total matching results
assertEquals(totalResults, 8);
// expect 2 total matching results
assertEquals(totalResults, 2);
}
@Test
public void testSearchAcrossMultipleEntities() {
String query = "logging_events";
String query = "logging events";
SearchResult result = search(getOperationContext(), getSearchService(), query);
assertEquals((int) result.getNumEntities(), 8);
assertEquals((int) result.getNumEntities(), 6);
result =
search(
getOperationContext(),
getSearchService(),
List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME),
query);
assertEquals((int) result.getNumEntities(), 8);
assertEquals((int) result.getNumEntities(), 6);
result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4);
assertEquals((int) result.getNumEntities(), 2);
result =
search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4);
@ -1706,7 +1706,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8);
assertEquals(result.getEntities().size(), 2);
}
@Test
@ -1729,7 +1729,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8);
assertEquals(result.getEntities().size(), 2);
}
@Test
@ -1755,6 +1755,27 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
assertEquals(result.getEntities().size(), 8);
}
@Test
public void testQuotedPrefixDescriptionField() {
String query = "\"Constructs the fct_users_deleted\"";
SearchResult result = searchAcrossEntities(getOperationContext(), getSearchService(), query);
assertTrue(
result.hasEntities() && !result.getEntities().isEmpty(),
String.format("%s - Expected search results", query));
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 4);
assertTrue(
result.getEntities().stream()
.allMatch(
e ->
e.getMatchedFields().stream().anyMatch(m -> m.getName().equals("description"))),
"%s - Expected search results to match on description field based on prefix match");
}
@Test
public void testParens() {
String query = "dbt | (bigquery + covid19)";
@ -1878,7 +1899,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 10);
assertEquals(result.getEntities().size(), 2);
assertEquals(
result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
@ -1937,9 +1958,9 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertTrue(
result.getEntities().size() > 2,
String.format("%s - Expected search results to have at least two results", query));
assertFalse(
result.getEntities().isEmpty(),
String.format("%s - Expected search results to have at least 1 result.", query));
assertEquals(
result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",

View File

@ -1,6 +1,7 @@
# Use for testing with search fixtures
queryConfigurations:
# Select *
# Select */explore all
# Attempt to rank active incidents at the top followed by enrichment factors
- queryRegex: '[*]|'
simpleQuery: false
prefixMatchQuery: false
@ -8,44 +9,91 @@ queryConfigurations:
functionScore:
functions:
- filter:
match_all: {}
weight: 1
term:
hasActiveIncidents:
value: true
weight: 2.0
- filter:
term:
materialized:
hasDescription:
value: true
weight: 0.5
weight: 1.25
- filter:
term:
hasOwners:
value: true
weight: 1.25
- filter:
term:
hasDomain:
value: true
weight: 1.1
- filter:
term:
hasGlossaryTerms:
value: true
weight: 1.1
- filter:
term:
hasTags:
value: true
weight: 1.1
- filter:
term:
hasRowCount:
value: true
weight: 1.05
- filter:
term:
hasColumnCount:
value: true
weight: 1.05
- filter:
term:
deprecated:
value: true
weight: 0.5
score_mode: avg
weight: 0.25
score_mode: multiply
boost_mode: replace
# Criteria for exact-match only
# Contains quotes, is a single term with `_`, `.`, or `-` (normally consider for tokenization) then use exact match query
- queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
term:
deprecated:
value: true
weight: 0.25
- filter:
terms:
tags:
- urn:li:tag:pii
weight: 1.25
score_mode: multiply
boost_mode: multiply
# default
- queryRegex: .*
simpleQuery: true
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
match_all: {}
weight: 1
- filter:
term:
materialized:
value: true
weight: 0.5
- filter:
term:
deprecated:
value: true
weight: 0.5
weight: 0.25
- filter:
terms:
tags:
- urn:li:tag:pii
weight: 1.25
score_mode: avg
boost_mode: multiply
score_mode: multiply
boost_mode: multiply

View File

@ -7,10 +7,12 @@ import java.util.List;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
@Builder(toBuilder = true)
@Getter
@EqualsAndHashCode
@ToString
@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class)
public class CustomSearchConfiguration {

View File

@ -69,7 +69,7 @@ queryConfigurations:
- queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false
prefixMatchQuery: false
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions: