mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-28 17:33:04 +00:00
fix(search): restore prefix phrase match on quoted search (#11444)
This commit is contained in:
parent
2ceb8e0934
commit
b17d7764e4
@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev
|
|||||||
- ${DATAHUB_LOCAL_GMS_ENV:-empty2.env}
|
- ${DATAHUB_LOCAL_GMS_ENV:-empty2.env}
|
||||||
environment: &datahub-gms-dev-env
|
environment: &datahub-gms-dev-env
|
||||||
<<: [*datahub-dev-telemetry-env, *datahub-gms-env]
|
<<: [*datahub-dev-telemetry-env, *datahub-gms-env]
|
||||||
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml}
|
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml}
|
||||||
SKIP_ELASTICSEARCH_CHECK: false
|
SKIP_ELASTICSEARCH_CHECK: false
|
||||||
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
|
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
|
||||||
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false
|
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false
|
||||||
|
|||||||
@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference.
|
|||||||
If you want to:
|
If you want to:
|
||||||
|
|
||||||
- Exact match on term or phrase
|
- Exact match on term or phrase
|
||||||
- ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22)
|
- ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22)
|
||||||
- ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema)
|
- ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile)
|
||||||
- Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization.
|
- Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization.
|
||||||
|
|
||||||
- Exclude terms
|
- Exclude terms
|
||||||
|
|||||||
@ -0,0 +1,47 @@
|
|||||||
|
package com.linkedin.metadata.search.fixtures;
|
||||||
|
|
||||||
|
import static org.testng.AssertJUnit.assertEquals;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
|
||||||
|
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.springframework.core.io.ClassPathResource;
|
||||||
|
import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
public class SampleDataFixtureSetupTest extends AbstractTestNGSpringContextTests {
|
||||||
|
private static final String DEFAULT_CONFIG = "search_config.yaml";
|
||||||
|
private static final String TEST_FIXTURE_CONFIG = "search_config_fixture_test.yml";
|
||||||
|
private static final YAMLMapper MAPPER = new YAMLMapper();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure default search configuration matches the test fixture configuration (allowing for some
|
||||||
|
* differences)
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConfig() throws IOException {
|
||||||
|
final CustomSearchConfiguration defaultConfig;
|
||||||
|
final CustomSearchConfiguration fixtureConfig;
|
||||||
|
|
||||||
|
try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) {
|
||||||
|
defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
|
||||||
|
}
|
||||||
|
try (InputStream stream = new ClassPathResource(TEST_FIXTURE_CONFIG).getInputStream()) {
|
||||||
|
fixtureConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
|
||||||
|
|
||||||
|
// test specifics
|
||||||
|
((List<Map<String, Object>>)
|
||||||
|
fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions"))
|
||||||
|
.remove(1);
|
||||||
|
|
||||||
|
((List<Map<String, Object>>)
|
||||||
|
fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions"))
|
||||||
|
.remove(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(fixtureConfig, defaultConfig);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -944,7 +944,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
|||||||
"covid",
|
"covid",
|
||||||
2,
|
2,
|
||||||
"\"raw_orders\"",
|
"\"raw_orders\"",
|
||||||
6,
|
1,
|
||||||
STRUCTURED_QUERY_PREFIX + "sample",
|
STRUCTURED_QUERY_PREFIX + "sample",
|
||||||
3,
|
3,
|
||||||
STRUCTURED_QUERY_PREFIX + "\"sample\"",
|
STRUCTURED_QUERY_PREFIX + "\"sample\"",
|
||||||
@ -1327,24 +1327,24 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
|||||||
totalResults += numResults;
|
totalResults += numResults;
|
||||||
scrollId = result.getScrollId();
|
scrollId = result.getScrollId();
|
||||||
} while (scrollId != null);
|
} while (scrollId != null);
|
||||||
// expect 8 total matching results
|
// expect 2 total matching results
|
||||||
assertEquals(totalResults, 8);
|
assertEquals(totalResults, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSearchAcrossMultipleEntities() {
|
public void testSearchAcrossMultipleEntities() {
|
||||||
String query = "logging_events";
|
String query = "logging events";
|
||||||
SearchResult result = search(getOperationContext(), getSearchService(), query);
|
SearchResult result = search(getOperationContext(), getSearchService(), query);
|
||||||
assertEquals((int) result.getNumEntities(), 8);
|
assertEquals((int) result.getNumEntities(), 6);
|
||||||
result =
|
result =
|
||||||
search(
|
search(
|
||||||
getOperationContext(),
|
getOperationContext(),
|
||||||
getSearchService(),
|
getSearchService(),
|
||||||
List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME),
|
List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME),
|
||||||
query);
|
query);
|
||||||
assertEquals((int) result.getNumEntities(), 8);
|
assertEquals((int) result.getNumEntities(), 6);
|
||||||
result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query);
|
result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query);
|
||||||
assertEquals((int) result.getNumEntities(), 4);
|
assertEquals((int) result.getNumEntities(), 2);
|
||||||
result =
|
result =
|
||||||
search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
|
search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
|
||||||
assertEquals((int) result.getNumEntities(), 4);
|
assertEquals((int) result.getNumEntities(), 4);
|
||||||
@ -1706,7 +1706,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
|||||||
assertTrue(
|
assertTrue(
|
||||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||||
String.format("%s - Expected search results to include matched fields", query));
|
String.format("%s - Expected search results to include matched fields", query));
|
||||||
assertEquals(result.getEntities().size(), 8);
|
assertEquals(result.getEntities().size(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -1729,7 +1729,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
|||||||
assertTrue(
|
assertTrue(
|
||||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||||
String.format("%s - Expected search results to include matched fields", query));
|
String.format("%s - Expected search results to include matched fields", query));
|
||||||
assertEquals(result.getEntities().size(), 8);
|
assertEquals(result.getEntities().size(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -1755,6 +1755,27 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
|||||||
assertEquals(result.getEntities().size(), 8);
|
assertEquals(result.getEntities().size(), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testQuotedPrefixDescriptionField() {
|
||||||
|
String query = "\"Constructs the fct_users_deleted\"";
|
||||||
|
SearchResult result = searchAcrossEntities(getOperationContext(), getSearchService(), query);
|
||||||
|
assertTrue(
|
||||||
|
result.hasEntities() && !result.getEntities().isEmpty(),
|
||||||
|
String.format("%s - Expected search results", query));
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||||
|
String.format("%s - Expected search results to include matched fields", query));
|
||||||
|
assertEquals(result.getEntities().size(), 4);
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
result.getEntities().stream()
|
||||||
|
.allMatch(
|
||||||
|
e ->
|
||||||
|
e.getMatchedFields().stream().anyMatch(m -> m.getName().equals("description"))),
|
||||||
|
"%s - Expected search results to match on description field based on prefix match");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParens() {
|
public void testParens() {
|
||||||
String query = "dbt | (bigquery + covid19)";
|
String query = "dbt | (bigquery + covid19)";
|
||||||
@ -1878,7 +1899,7 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
|||||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||||
String.format("%s - Expected search results to include matched fields", query));
|
String.format("%s - Expected search results to include matched fields", query));
|
||||||
|
|
||||||
assertEquals(result.getEntities().size(), 10);
|
assertEquals(result.getEntities().size(), 2);
|
||||||
assertEquals(
|
assertEquals(
|
||||||
result.getEntities().get(0).getEntity().toString(),
|
result.getEntities().get(0).getEntity().toString(),
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
|
||||||
@ -1937,9 +1958,9 @@ public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringCont
|
|||||||
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
|
||||||
String.format("%s - Expected search results to include matched fields", query));
|
String.format("%s - Expected search results to include matched fields", query));
|
||||||
|
|
||||||
assertTrue(
|
assertFalse(
|
||||||
result.getEntities().size() > 2,
|
result.getEntities().isEmpty(),
|
||||||
String.format("%s - Expected search results to have at least two results", query));
|
String.format("%s - Expected search results to have at least 1 result.", query));
|
||||||
assertEquals(
|
assertEquals(
|
||||||
result.getEntities().get(0).getEntity().toString(),
|
result.getEntities().get(0).getEntity().toString(),
|
||||||
"urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",
|
"urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
# Use for testing with search fixtures
|
# Use for testing with search fixtures
|
||||||
queryConfigurations:
|
queryConfigurations:
|
||||||
# Select *
|
# Select */explore all
|
||||||
|
# Attempt to rank active incidents at the top followed by enrichment factors
|
||||||
- queryRegex: '[*]|'
|
- queryRegex: '[*]|'
|
||||||
simpleQuery: false
|
simpleQuery: false
|
||||||
prefixMatchQuery: false
|
prefixMatchQuery: false
|
||||||
@ -8,44 +9,91 @@ queryConfigurations:
|
|||||||
functionScore:
|
functionScore:
|
||||||
functions:
|
functions:
|
||||||
- filter:
|
- filter:
|
||||||
match_all: {}
|
term:
|
||||||
weight: 1
|
hasActiveIncidents:
|
||||||
|
value: true
|
||||||
|
weight: 2.0
|
||||||
- filter:
|
- filter:
|
||||||
term:
|
term:
|
||||||
materialized:
|
hasDescription:
|
||||||
value: true
|
value: true
|
||||||
weight: 0.5
|
weight: 1.25
|
||||||
|
- filter:
|
||||||
|
term:
|
||||||
|
hasOwners:
|
||||||
|
value: true
|
||||||
|
weight: 1.25
|
||||||
|
- filter:
|
||||||
|
term:
|
||||||
|
hasDomain:
|
||||||
|
value: true
|
||||||
|
weight: 1.1
|
||||||
|
- filter:
|
||||||
|
term:
|
||||||
|
hasGlossaryTerms:
|
||||||
|
value: true
|
||||||
|
weight: 1.1
|
||||||
|
- filter:
|
||||||
|
term:
|
||||||
|
hasTags:
|
||||||
|
value: true
|
||||||
|
weight: 1.1
|
||||||
|
- filter:
|
||||||
|
term:
|
||||||
|
hasRowCount:
|
||||||
|
value: true
|
||||||
|
weight: 1.05
|
||||||
|
- filter:
|
||||||
|
term:
|
||||||
|
hasColumnCount:
|
||||||
|
value: true
|
||||||
|
weight: 1.05
|
||||||
- filter:
|
- filter:
|
||||||
term:
|
term:
|
||||||
deprecated:
|
deprecated:
|
||||||
value: true
|
value: true
|
||||||
weight: 0.5
|
weight: 0.25
|
||||||
score_mode: avg
|
score_mode: multiply
|
||||||
|
boost_mode: replace
|
||||||
|
|
||||||
|
# Criteria for exact-match only
|
||||||
|
# Contains quotes, is a single term with `_`, `.`, or `-` (normally consider for tokenization) then use exact match query
|
||||||
|
- queryRegex: >-
|
||||||
|
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
|
||||||
|
simpleQuery: false
|
||||||
|
prefixMatchQuery: true
|
||||||
|
exactMatchQuery: true
|
||||||
|
functionScore:
|
||||||
|
functions:
|
||||||
|
- filter:
|
||||||
|
term:
|
||||||
|
deprecated:
|
||||||
|
value: true
|
||||||
|
weight: 0.25
|
||||||
|
- filter:
|
||||||
|
terms:
|
||||||
|
tags:
|
||||||
|
- urn:li:tag:pii
|
||||||
|
weight: 1.25
|
||||||
|
score_mode: multiply
|
||||||
boost_mode: multiply
|
boost_mode: multiply
|
||||||
|
|
||||||
|
# default
|
||||||
- queryRegex: .*
|
- queryRegex: .*
|
||||||
simpleQuery: true
|
simpleQuery: true
|
||||||
prefixMatchQuery: true
|
prefixMatchQuery: true
|
||||||
exactMatchQuery: true
|
exactMatchQuery: true
|
||||||
functionScore:
|
functionScore:
|
||||||
functions:
|
functions:
|
||||||
- filter:
|
|
||||||
match_all: {}
|
|
||||||
weight: 1
|
|
||||||
- filter:
|
|
||||||
term:
|
|
||||||
materialized:
|
|
||||||
value: true
|
|
||||||
weight: 0.5
|
|
||||||
- filter:
|
- filter:
|
||||||
term:
|
term:
|
||||||
deprecated:
|
deprecated:
|
||||||
value: true
|
value: true
|
||||||
weight: 0.5
|
weight: 0.25
|
||||||
- filter:
|
- filter:
|
||||||
terms:
|
terms:
|
||||||
tags:
|
tags:
|
||||||
- urn:li:tag:pii
|
- urn:li:tag:pii
|
||||||
weight: 1.25
|
weight: 1.25
|
||||||
score_mode: avg
|
score_mode: multiply
|
||||||
boost_mode: multiply
|
boost_mode: multiply
|
||||||
|
|||||||
@ -7,10 +7,12 @@ import java.util.List;
|
|||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
import lombok.ToString;
|
||||||
|
|
||||||
@Builder(toBuilder = true)
|
@Builder(toBuilder = true)
|
||||||
@Getter
|
@Getter
|
||||||
@EqualsAndHashCode
|
@EqualsAndHashCode
|
||||||
|
@ToString
|
||||||
@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class)
|
@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class)
|
||||||
public class CustomSearchConfiguration {
|
public class CustomSearchConfiguration {
|
||||||
|
|
||||||
|
|||||||
@ -69,7 +69,7 @@ queryConfigurations:
|
|||||||
- queryRegex: >-
|
- queryRegex: >-
|
||||||
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
|
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
|
||||||
simpleQuery: false
|
simpleQuery: false
|
||||||
prefixMatchQuery: false
|
prefixMatchQuery: true
|
||||||
exactMatchQuery: true
|
exactMatchQuery: true
|
||||||
functionScore:
|
functionScore:
|
||||||
functions:
|
functions:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user