mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-27 08:54:32 +00:00
fix(search): restore prefix phrase match on quoted search (#11444)
This commit is contained in:
parent
2ceb8e0934
commit
b17d7764e4
@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev
|
||||
- ${DATAHUB_LOCAL_GMS_ENV:-empty2.env}
|
||||
environment: &datahub-gms-dev-env
|
||||
<<: [*datahub-dev-telemetry-env, *datahub-gms-env]
|
||||
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml}
|
||||
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml}
|
||||
SKIP_ELASTICSEARCH_CHECK: false
|
||||
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
|
||||
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false
|
||||
|
||||
@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference.
|
||||
If you want to:
|
||||
|
||||
- Exact match on term or phrase
|
||||
- ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22)
|
||||
- ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema)
|
||||
- ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22)
|
||||
- ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile)
|
||||
- Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization.
|
||||
|
||||
- Exclude terms
|
||||
|
||||
@ -0,0 +1,47 @@
|
||||
package com.linkedin.metadata.search.fixtures;
|
||||
|
||||
import static org.testng.AssertJUnit.assertEquals;
|
||||
|
||||
import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
|
||||
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class SampleDataFixtureSetupTest extends AbstractTestNGSpringContextTests {
|
||||
private static final String DEFAULT_CONFIG = "search_config.yaml";
|
||||
private static final String TEST_FIXTURE_CONFIG = "search_config_fixture_test.yml";
|
||||
private static final YAMLMapper MAPPER = new YAMLMapper();
|
||||
|
||||
/**
|
||||
* Ensure default search configuration matches the test fixture configuration (allowing for some
|
||||
* differences)
|
||||
*/
|
||||
@Test
|
||||
public void testConfig() throws IOException {
|
||||
final CustomSearchConfiguration defaultConfig;
|
||||
final CustomSearchConfiguration fixtureConfig;
|
||||
|
||||
try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) {
|
||||
defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
|
||||
}
|
||||
try (InputStream stream = new ClassPathResource(TEST_FIXTURE_CONFIG).getInputStream()) {
|
||||
fixtureConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
|
||||
|
||||
// test specifics
|
||||
((List<Map<String, Object>>)
|
||||
fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions"))
|
||||
.remove(1);
|
||||
|
||||
((List<Map<String, Object>>)
|
||||
fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions"))
|
||||
.remove(1);
|
||||
}
|
||||
|
||||
assertEquals(fixtureConfig, defaultConfig);
|
||||
}
|
||||
}
|
||||
@ -944,7 +944,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
||||
"covid",
|
||||
2,
|
||||
"\"raw_orders\"",
|
||||
6,
|
||||
1,
|
||||
STRUCTURED_QUERY_PREFIX + "sample",
|
||||
3,
|
||||
STRUCTURED_QUERY_PREFIX + "\"sample\"",
|
||||
@ -1327,24 +1327,24 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
||||
totalResults += numResults;
|
||||
scrollId = result.getScrollId();
|
||||
} while (scrollId != null);
|
||||
// expect 8 total matching results
|
||||
assertEquals(totalResults, 8);
|
||||
// expect 2 total matching results
|
||||
assertEquals(totalResults, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSearchAcrossMultipleEntities() {
|
||||
String query = "logging_events";
|
||||
String query = "logging events";
|
||||
SearchResult result = search(getOperationContext(), getSearchService(), query);
|
||||
assertEquals((int) result.getNumEntities(), 8);
|
||||
assertEquals((int) result.getNumEntities(), 6);
|
||||
result =
|
||||
search(
|
||||
getOperationContext(),
|
||||
getSearchService(),
|
||||
List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME),
|
||||
query);
|
||||
assertEquals((int) result.getNumEntities(), 8);
|
||||
assertEquals((int) result.getNumEntities(), 6);
|
||||
result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query);
|
||||
assertEquals((int) result.getNumEntities(), 4);
|
||||
assertEquals((int) result.getNumEntities(), 2);
|
||||
result =
|
||||
search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
|
||||
assertEquals((int) result.getNumEntities(), 4);
|
||||
@ -1706,7 +1706,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
||||
assertTrue(
|
||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||
String.format("%s - Expected search results to include matched fields", query));
|
||||
assertEquals(result.getEntities().size(), 8);
|
||||
assertEquals(result.getEntities().size(), 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -1729,7 +1729,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
||||
assertTrue(
|
||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||
String.format("%s - Expected search results to include matched fields", query));
|
||||
assertEquals(result.getEntities().size(), 8);
|
||||
assertEquals(result.getEntities().size(), 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -1755,6 +1755,27 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
||||
assertEquals(result.getEntities().size(), 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuotedPrefixDescriptionField() {
|
||||
String query = "\"Constructs the fct_users_deleted\"";
|
||||
SearchResult result = searchAcrossEntities(getOperationContext(), getSearchService(), query);
|
||||
assertTrue(
|
||||
result.hasEntities() && !result.getEntities().isEmpty(),
|
||||
String.format("%s - Expected search results", query));
|
||||
|
||||
assertTrue(
|
||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||
String.format("%s - Expected search results to include matched fields", query));
|
||||
assertEquals(result.getEntities().size(), 4);
|
||||
|
||||
assertTrue(
|
||||
result.getEntities().stream()
|
||||
.allMatch(
|
||||
e ->
|
||||
e.getMatchedFields().stream().anyMatch(m -> m.getName().equals("description"))),
|
||||
"%s - Expected search results to match on description field based on prefix match");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParens() {
|
||||
String query = "dbt | (bigquery + covid19)";
|
||||
@ -1878,7 +1899,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||
String.format("%s - Expected search results to include matched fields", query));
|
||||
|
||||
assertEquals(result.getEntities().size(), 10);
|
||||
assertEquals(result.getEntities().size(), 2);
|
||||
assertEquals(
|
||||
result.getEntities().get(0).getEntity().toString(),
|
||||
"urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
|
||||
@ -1937,9 +1958,9 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||
String.format("%s - Expected search results to include matched fields", query));
|
||||
|
||||
assertTrue(
|
||||
result.getEntities().size() > 2,
|
||||
String.format("%s - Expected search results to have at least two results", query));
|
||||
assertFalse(
|
||||
result.getEntities().isEmpty(),
|
||||
String.format("%s - Expected search results to have at least 1 result.", query));
|
||||
assertEquals(
|
||||
result.getEntities().get(0).getEntity().toString(),
|
||||
"urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
# Use for testing with search fixtures
|
||||
queryConfigurations:
|
||||
# Select *
|
||||
# Select */explore all
|
||||
# Attempt to rank active incidents at the top followed by enrichment factors
|
||||
- queryRegex: '[*]|'
|
||||
simpleQuery: false
|
||||
prefixMatchQuery: false
|
||||
@ -8,44 +9,91 @@ queryConfigurations:
|
||||
functionScore:
|
||||
functions:
|
||||
- filter:
|
||||
match_all: {}
|
||||
weight: 1
|
||||
term:
|
||||
hasActiveIncidents:
|
||||
value: true
|
||||
weight: 2.0
|
||||
- filter:
|
||||
term:
|
||||
materialized:
|
||||
hasDescription:
|
||||
value: true
|
||||
weight: 0.5
|
||||
weight: 1.25
|
||||
- filter:
|
||||
term:
|
||||
hasOwners:
|
||||
value: true
|
||||
weight: 1.25
|
||||
- filter:
|
||||
term:
|
||||
hasDomain:
|
||||
value: true
|
||||
weight: 1.1
|
||||
- filter:
|
||||
term:
|
||||
hasGlossaryTerms:
|
||||
value: true
|
||||
weight: 1.1
|
||||
- filter:
|
||||
term:
|
||||
hasTags:
|
||||
value: true
|
||||
weight: 1.1
|
||||
- filter:
|
||||
term:
|
||||
hasRowCount:
|
||||
value: true
|
||||
weight: 1.05
|
||||
- filter:
|
||||
term:
|
||||
hasColumnCount:
|
||||
value: true
|
||||
weight: 1.05
|
||||
- filter:
|
||||
term:
|
||||
deprecated:
|
||||
value: true
|
||||
weight: 0.5
|
||||
score_mode: avg
|
||||
weight: 0.25
|
||||
score_mode: multiply
|
||||
boost_mode: replace
|
||||
|
||||
# Criteria for exact-match only
|
||||
# Contains quotes, is a single term with `_`, `.`, or `-` (normally consider for tokenization) then use exact match query
|
||||
- queryRegex: >-
|
||||
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
|
||||
simpleQuery: false
|
||||
prefixMatchQuery: true
|
||||
exactMatchQuery: true
|
||||
functionScore:
|
||||
functions:
|
||||
- filter:
|
||||
term:
|
||||
deprecated:
|
||||
value: true
|
||||
weight: 0.25
|
||||
- filter:
|
||||
terms:
|
||||
tags:
|
||||
- urn:li:tag:pii
|
||||
weight: 1.25
|
||||
score_mode: multiply
|
||||
boost_mode: multiply
|
||||
|
||||
# default
|
||||
- queryRegex: .*
|
||||
simpleQuery: true
|
||||
prefixMatchQuery: true
|
||||
exactMatchQuery: true
|
||||
functionScore:
|
||||
functions:
|
||||
- filter:
|
||||
match_all: {}
|
||||
weight: 1
|
||||
- filter:
|
||||
term:
|
||||
materialized:
|
||||
value: true
|
||||
weight: 0.5
|
||||
- filter:
|
||||
term:
|
||||
deprecated:
|
||||
value: true
|
||||
weight: 0.5
|
||||
weight: 0.25
|
||||
- filter:
|
||||
terms:
|
||||
tags:
|
||||
- urn:li:tag:pii
|
||||
weight: 1.25
|
||||
score_mode: avg
|
||||
boost_mode: multiply
|
||||
score_mode: multiply
|
||||
boost_mode: multiply
|
||||
|
||||
@ -7,10 +7,12 @@ import java.util.List;
|
||||
import lombok.Builder;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
|
||||
@Builder(toBuilder = true)
|
||||
@Getter
|
||||
@EqualsAndHashCode
|
||||
@ToString
|
||||
@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class)
|
||||
public class CustomSearchConfiguration {
|
||||
|
||||
|
||||
@ -69,7 +69,7 @@ queryConfigurations:
|
||||
- queryRegex: >-
|
||||
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
|
||||
simpleQuery: false
|
||||
prefixMatchQuery: false
|
||||
prefixMatchQuery: true
|
||||
exactMatchQuery: true
|
||||
functionScore:
|
||||
functions:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user