mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-03 12:16:10 +00:00
feat(browsepathv2): enable incremental update browsepath (#8354)
This commit is contained in:
parent
393b5a3039
commit
a5f420ffd4
@ -437,7 +437,7 @@ public class ESBrowseDAO {
|
||||
|
||||
queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal));
|
||||
|
||||
queryBuilder.must(SearchRequestHandler.getFilterQuery(filter));
|
||||
queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter));
|
||||
|
||||
return queryBuilder;
|
||||
}
|
||||
|
||||
@ -191,7 +191,7 @@ public class SearchRequestHandler {
|
||||
BoolQueryBuilder filterQuery = getFilterQuery(filter);
|
||||
searchSourceBuilder.query(QueryBuilders.boolQuery()
|
||||
.must(getQuery(input, finalSearchFlags.isFulltext()))
|
||||
.must(filterQuery));
|
||||
.filter(filterQuery));
|
||||
if (!finalSearchFlags.isSkipAggregates()) {
|
||||
_aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation);
|
||||
}
|
||||
@ -231,7 +231,7 @@ public class SearchRequestHandler {
|
||||
searchSourceBuilder.fetchSource("urn", null);
|
||||
|
||||
BoolQueryBuilder filterQuery = getFilterQuery(filter);
|
||||
searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).must(filterQuery));
|
||||
searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery));
|
||||
_aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
|
||||
searchSourceBuilder.highlighter(getHighlights());
|
||||
ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);
|
||||
|
||||
@ -109,10 +109,11 @@ public class ESUtils {
|
||||
boolean isTimeseries) {
|
||||
final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder();
|
||||
conjunctiveCriterion.getAnd().forEach(criterion -> {
|
||||
if (!criterion.getValue().trim().isEmpty() || criterion.hasValues()
|
||||
|| criterion.getCondition() == Condition.IS_NULL) {
|
||||
if (Set.of(Condition.EXISTS, Condition.IS_NULL).contains(criterion.getCondition())
|
||||
|| !criterion.getValue().trim().isEmpty() || criterion.hasValues()) {
|
||||
if (!criterion.isNegated()) {
|
||||
andQueryBuilder.must(getQueryBuilderFromCriterion(criterion, isTimeseries));
|
||||
// `filter` instead of `must` (enables caching and bypasses scoring)
|
||||
andQueryBuilder.filter(getQueryBuilderFromCriterion(criterion, isTimeseries));
|
||||
} else {
|
||||
andQueryBuilder.mustNot(getQueryBuilderFromCriterion(criterion, isTimeseries));
|
||||
}
|
||||
@ -261,8 +262,12 @@ public class ESUtils {
|
||||
Criterion criterionToQuery = new Criterion();
|
||||
criterionToQuery.setCondition(criterion.getCondition());
|
||||
criterionToQuery.setNegated(criterion.isNegated());
|
||||
criterionToQuery.setValue(criterion.getValue());
|
||||
criterionToQuery.setValues(criterion.getValues());
|
||||
if (criterion.hasValues()) {
|
||||
criterionToQuery.setValues(criterion.getValues());
|
||||
}
|
||||
if (criterion.hasValue()) {
|
||||
criterionToQuery.setValue(criterion.getValue());
|
||||
}
|
||||
criterionToQuery.setField(toKeywordField(field, isTimeseries));
|
||||
orQueryBuilder.should(getQueryBuilderFromCriterionForSingleField(criterionToQuery, isTimeseries));
|
||||
}
|
||||
@ -274,30 +279,32 @@ public class ESUtils {
|
||||
final Condition condition = criterion.getCondition();
|
||||
final String fieldName = toFacetField(criterion.getField());
|
||||
|
||||
if (condition == Condition.EQUAL) {
|
||||
return buildEqualsConditionFromCriterion(fieldName, criterion, isTimeseries);
|
||||
} else if (condition == Condition.IS_NULL) {
|
||||
if (condition == Condition.IS_NULL) {
|
||||
return QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(criterion.getField())).queryName(fieldName);
|
||||
} else if (condition == Condition.EXISTS) {
|
||||
return QueryBuilders.boolQuery().must(QueryBuilders.existsQuery(criterion.getField())).queryName(fieldName);
|
||||
// TODO: Support multi-match on the following operators (using new 'values' field)
|
||||
} else if (condition == Condition.GREATER_THAN) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).gt(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).gte(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.LESS_THAN) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).lt(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.LESS_THAN_OR_EQUAL_TO) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).lte(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.CONTAIN) {
|
||||
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
|
||||
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
|
||||
} else if (condition == Condition.START_WITH) {
|
||||
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
|
||||
ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
|
||||
} else if (condition == Condition.END_WITH) {
|
||||
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
|
||||
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim())).queryName(fieldName);
|
||||
} else if (criterion.hasValues() || criterion.hasValue()) {
|
||||
if (condition == Condition.EQUAL) {
|
||||
return buildEqualsConditionFromCriterion(fieldName, criterion, isTimeseries);
|
||||
// TODO: Support multi-match on the following operators (using new 'values' field)
|
||||
} else if (condition == Condition.GREATER_THAN) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).gt(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).gte(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.LESS_THAN) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).lt(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.LESS_THAN_OR_EQUAL_TO) {
|
||||
return QueryBuilders.rangeQuery(criterion.getField()).lte(criterion.getValue().trim()).queryName(fieldName);
|
||||
} else if (condition == Condition.CONTAIN) {
|
||||
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
|
||||
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
|
||||
} else if (condition == Condition.START_WITH) {
|
||||
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
|
||||
ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
|
||||
} else if (condition == Condition.END_WITH) {
|
||||
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
|
||||
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim())).queryName(fieldName);
|
||||
}
|
||||
}
|
||||
throw new UnsupportedOperationException("Unsupported condition: " + condition);
|
||||
}
|
||||
|
||||
@ -29,6 +29,7 @@ import com.linkedin.metadata.query.filter.CriterionArray;
|
||||
import com.linkedin.metadata.query.filter.Filter;
|
||||
import org.elasticsearch.action.search.SearchRequest;
|
||||
import org.elasticsearch.index.query.BoolQueryBuilder;
|
||||
import org.elasticsearch.index.query.ExistsQueryBuilder;
|
||||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
||||
import org.elasticsearch.index.query.TermsQueryBuilder;
|
||||
@ -215,7 +216,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
|
||||
}
|
||||
|
||||
private void testFilterQuery(BoolQueryBuilder testQuery) {
|
||||
Optional<MatchQueryBuilder> mustNotHaveRemovedCondition = testQuery.must()
|
||||
Optional<MatchQueryBuilder> mustNotHaveRemovedCondition = testQuery.filter()
|
||||
.stream()
|
||||
.filter(or -> or instanceof BoolQueryBuilder)
|
||||
.map(or -> (BoolQueryBuilder) or)
|
||||
@ -292,13 +293,13 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
|
||||
|
||||
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
|
||||
|
||||
// bool -> must -> [bool] -> should -> [bool] -> must -> [bool] -> should -> [terms]
|
||||
List<TermsQueryBuilder> termsQueryBuilders = testQuery.must()
|
||||
// bool -> filter -> [bool] -> should -> [bool] -> filter -> [bool] -> should -> [terms]
|
||||
List<TermsQueryBuilder> termsQueryBuilders = testQuery.filter()
|
||||
.stream()
|
||||
.filter(or -> or instanceof BoolQueryBuilder)
|
||||
.flatMap(or -> ((BoolQueryBuilder) or).should().stream())
|
||||
.filter(should -> should instanceof BoolQueryBuilder)
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
|
||||
.filter(must -> must instanceof BoolQueryBuilder)
|
||||
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
|
||||
.filter(should -> should instanceof TermsQueryBuilder)
|
||||
@ -342,12 +343,12 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
|
||||
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
|
||||
|
||||
// bool -> must -> [bool] -> should -> [bool] -> must -> [bool] -> should -> [bool] -> should -> [match]
|
||||
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.must()
|
||||
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.filter()
|
||||
.stream()
|
||||
.filter(or -> or instanceof BoolQueryBuilder)
|
||||
.flatMap(or -> ((BoolQueryBuilder) or).should().stream())
|
||||
.filter(should -> should instanceof BoolQueryBuilder)
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
|
||||
.filter(must -> must instanceof BoolQueryBuilder)
|
||||
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
|
||||
.filter(should -> should instanceof BoolQueryBuilder)
|
||||
@ -381,13 +382,13 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
|
||||
|
||||
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
|
||||
|
||||
// bool -> must -> [bool] -> should -> [bool] -> must -> [bool] -> should -> [match]
|
||||
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.must()
|
||||
// bool -> filter -> [bool] -> should -> [bool] -> filter -> [bool] -> should -> [match]
|
||||
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.filter()
|
||||
.stream()
|
||||
.filter(or -> or instanceof BoolQueryBuilder)
|
||||
.flatMap(or -> ((BoolQueryBuilder) or).should().stream())
|
||||
.filter(should -> should instanceof BoolQueryBuilder)
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
|
||||
.filter(must -> must instanceof BoolQueryBuilder)
|
||||
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
|
||||
.filter(should -> should instanceof MultiMatchQueryBuilder)
|
||||
@ -414,13 +415,13 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
|
||||
|
||||
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
|
||||
|
||||
// bool -> must -> [bool] -> should -> [bool] -> must -> [terms]
|
||||
List<TermsQueryBuilder> termsQueryBuilders = testQuery.must()
|
||||
// bool -> filter -> [bool] -> should -> [bool] -> filter -> [terms]
|
||||
List<TermsQueryBuilder> termsQueryBuilders = testQuery.filter()
|
||||
.stream()
|
||||
.filter(must -> must instanceof BoolQueryBuilder)
|
||||
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
|
||||
.filter(should -> should instanceof BoolQueryBuilder)
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
|
||||
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
|
||||
.filter(must -> must instanceof TermsQueryBuilder)
|
||||
.map(must -> (TermsQueryBuilder) must)
|
||||
.collect(Collectors.toList());
|
||||
@ -439,6 +440,44 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
|
||||
assertTrue(values.contains("bigquery"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBrowsePathQueryFilter() {
|
||||
// Condition: has `browsePaths` AND does NOT have `browsePathV2`
|
||||
Criterion missingBrowsePathV2 = new Criterion();
|
||||
missingBrowsePathV2.setCondition(Condition.IS_NULL);
|
||||
missingBrowsePathV2.setField("browsePathV2");
|
||||
// Excludes entities without browsePaths
|
||||
Criterion hasBrowsePathV1 = new Criterion();
|
||||
hasBrowsePathV1.setCondition(Condition.EXISTS);
|
||||
hasBrowsePathV1.setField("browsePaths");
|
||||
|
||||
CriterionArray criterionArray = new CriterionArray();
|
||||
criterionArray.add(missingBrowsePathV2);
|
||||
criterionArray.add(hasBrowsePathV1);
|
||||
|
||||
ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
|
||||
conjunctiveCriterion.setAnd(criterionArray);
|
||||
|
||||
ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
|
||||
conjunctiveCriterionArray.add(conjunctiveCriterion);
|
||||
|
||||
Filter filter = new Filter();
|
||||
filter.setOr(conjunctiveCriterionArray);
|
||||
|
||||
BoolQueryBuilder test = SearchRequestHandler.getFilterQuery(filter);
|
||||
|
||||
assertEquals(test.should().size(), 1);
|
||||
|
||||
BoolQueryBuilder shouldQuery = (BoolQueryBuilder) test.should().get(0);
|
||||
assertEquals(shouldQuery.filter().size(), 2);
|
||||
|
||||
BoolQueryBuilder mustNotHaveV2 = (BoolQueryBuilder) shouldQuery.filter().get(0);
|
||||
assertEquals(((ExistsQueryBuilder) mustNotHaveV2.mustNot().get(0)).fieldName(), "browsePathV2");
|
||||
|
||||
BoolQueryBuilder mustHaveV1 = (BoolQueryBuilder) shouldQuery.filter().get(1);
|
||||
assertEquals(((ExistsQueryBuilder) mustHaveV1.must().get(0)).fieldName(), "browsePaths");
|
||||
}
|
||||
|
||||
private BoolQueryBuilder getQuery(final Criterion filterCriterion) {
|
||||
final Filter filter = new Filter().setOr(
|
||||
new ConjunctiveCriterionArray(
|
||||
|
||||
@ -9,6 +9,12 @@ import com.linkedin.events.metadata.ChangeType;
|
||||
import com.linkedin.metadata.Constants;
|
||||
import com.linkedin.metadata.boot.UpgradeStep;
|
||||
import com.linkedin.metadata.entity.EntityService;
|
||||
import com.linkedin.metadata.query.filter.Condition;
|
||||
import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
|
||||
import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
|
||||
import com.linkedin.metadata.query.filter.Criterion;
|
||||
import com.linkedin.metadata.query.filter.CriterionArray;
|
||||
import com.linkedin.metadata.query.filter.Filter;
|
||||
import com.linkedin.metadata.search.ScrollResult;
|
||||
import com.linkedin.metadata.search.SearchEntity;
|
||||
import com.linkedin.metadata.search.SearchService;
|
||||
@ -69,17 +75,39 @@ public class BackfillBrowsePathsV2Step extends UpgradeStep {
|
||||
}
|
||||
|
||||
private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, String scrollId)
|
||||
throws Exception {
|
||||
throws Exception {
|
||||
|
||||
// Condition: has `browsePaths` AND does NOT have `browsePathV2`
|
||||
Criterion missingBrowsePathV2 = new Criterion();
|
||||
missingBrowsePathV2.setCondition(Condition.IS_NULL);
|
||||
missingBrowsePathV2.setField("browsePathV2");
|
||||
// Excludes entities without browsePaths
|
||||
Criterion hasBrowsePathV1 = new Criterion();
|
||||
hasBrowsePathV1.setCondition(Condition.EXISTS);
|
||||
hasBrowsePathV1.setField("browsePaths");
|
||||
|
||||
CriterionArray criterionArray = new CriterionArray();
|
||||
criterionArray.add(missingBrowsePathV2);
|
||||
criterionArray.add(hasBrowsePathV1);
|
||||
|
||||
ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
|
||||
conjunctiveCriterion.setAnd(criterionArray);
|
||||
|
||||
ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
|
||||
conjunctiveCriterionArray.add(conjunctiveCriterion);
|
||||
|
||||
Filter filter = new Filter();
|
||||
filter.setOr(conjunctiveCriterionArray);
|
||||
|
||||
final ScrollResult scrollResult = _searchService.scrollAcrossEntities(
|
||||
ImmutableList.of(entityType),
|
||||
"*",
|
||||
null,
|
||||
null,
|
||||
scrollId,
|
||||
"5m",
|
||||
BATCH_SIZE,
|
||||
null
|
||||
ImmutableList.of(entityType),
|
||||
"*",
|
||||
filter,
|
||||
null,
|
||||
scrollId,
|
||||
"5m",
|
||||
BATCH_SIZE,
|
||||
null
|
||||
);
|
||||
if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) {
|
||||
return null;
|
||||
|
||||
@ -14,6 +14,7 @@ import com.linkedin.entity.EnvelopedAspectMap;
|
||||
import com.linkedin.metadata.Constants;
|
||||
import com.linkedin.metadata.entity.EntityService;
|
||||
import com.linkedin.metadata.models.registry.EntityRegistry;
|
||||
import com.linkedin.metadata.query.filter.Filter;
|
||||
import com.linkedin.metadata.search.ScrollResult;
|
||||
import com.linkedin.metadata.search.SearchEntity;
|
||||
import com.linkedin.metadata.search.SearchEntityArray;
|
||||
@ -88,7 +89,7 @@ public class BackfillBrowsePathsV2StepTest {
|
||||
Mockito.verify(mockSearchService, Mockito.times(9)).scrollAcrossEntities(
|
||||
Mockito.any(),
|
||||
Mockito.eq("*"),
|
||||
Mockito.eq(null),
|
||||
Mockito.any(Filter.class),
|
||||
Mockito.eq(null),
|
||||
Mockito.eq(null),
|
||||
Mockito.eq("5m"),
|
||||
@ -157,7 +158,7 @@ public class BackfillBrowsePathsV2StepTest {
|
||||
Mockito.when(mockSearchService.scrollAcrossEntities(
|
||||
Mockito.eq(ImmutableList.of(ENTITY_TYPES.get(i))),
|
||||
Mockito.eq("*"),
|
||||
Mockito.eq(null),
|
||||
Mockito.any(Filter.class),
|
||||
Mockito.eq(null),
|
||||
Mockito.eq(null),
|
||||
Mockito.eq("5m"),
|
||||
|
||||
@ -7,7 +7,7 @@ describe("run managed ingestion", () => {
|
||||
it("create run managed ingestion source", () => {
|
||||
let number = Math.floor(Math.random() * 100000);
|
||||
let testName = `cypress test source ${number}`
|
||||
let cli_version = "0.9.3.3rc5";
|
||||
let cli_version = "0.10.4.3";
|
||||
cy.login();
|
||||
cy.goToIngestionPage();
|
||||
cy.clickOptionWithText("Create new source");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user