feat(browsepathv2): enable incremental update browsepath (#8354)

This commit is contained in:
david-leifker 2023-07-06 10:42:07 -05:00 committed by GitHub
parent 393b5a3039
commit a5f420ffd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 53 deletions

View File

@ -437,7 +437,7 @@ public class ESBrowseDAO {
queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal));
queryBuilder.must(SearchRequestHandler.getFilterQuery(filter));
queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter));
return queryBuilder;
}

View File

@ -191,7 +191,7 @@ public class SearchRequestHandler {
BoolQueryBuilder filterQuery = getFilterQuery(filter);
searchSourceBuilder.query(QueryBuilders.boolQuery()
.must(getQuery(input, finalSearchFlags.isFulltext()))
.must(filterQuery));
.filter(filterQuery));
if (!finalSearchFlags.isSkipAggregates()) {
_aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation);
}
@ -231,7 +231,7 @@ public class SearchRequestHandler {
searchSourceBuilder.fetchSource("urn", null);
BoolQueryBuilder filterQuery = getFilterQuery(filter);
searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).must(filterQuery));
searchSourceBuilder.query(QueryBuilders.boolQuery().must(getQuery(input, finalSearchFlags.isFulltext())).filter(filterQuery));
_aggregationQueryBuilder.getAggregations().forEach(searchSourceBuilder::aggregation);
searchSourceBuilder.highlighter(getHighlights());
ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion);

View File

@ -109,10 +109,11 @@ public class ESUtils {
boolean isTimeseries) {
final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder();
conjunctiveCriterion.getAnd().forEach(criterion -> {
if (!criterion.getValue().trim().isEmpty() || criterion.hasValues()
|| criterion.getCondition() == Condition.IS_NULL) {
if (Set.of(Condition.EXISTS, Condition.IS_NULL).contains(criterion.getCondition())
|| !criterion.getValue().trim().isEmpty() || criterion.hasValues()) {
if (!criterion.isNegated()) {
andQueryBuilder.must(getQueryBuilderFromCriterion(criterion, isTimeseries));
// `filter` instead of `must` (enables caching and bypasses scoring)
andQueryBuilder.filter(getQueryBuilderFromCriterion(criterion, isTimeseries));
} else {
andQueryBuilder.mustNot(getQueryBuilderFromCriterion(criterion, isTimeseries));
}
@ -261,8 +262,12 @@ public class ESUtils {
Criterion criterionToQuery = new Criterion();
criterionToQuery.setCondition(criterion.getCondition());
criterionToQuery.setNegated(criterion.isNegated());
criterionToQuery.setValue(criterion.getValue());
criterionToQuery.setValues(criterion.getValues());
if (criterion.hasValues()) {
criterionToQuery.setValues(criterion.getValues());
}
if (criterion.hasValue()) {
criterionToQuery.setValue(criterion.getValue());
}
criterionToQuery.setField(toKeywordField(field, isTimeseries));
orQueryBuilder.should(getQueryBuilderFromCriterionForSingleField(criterionToQuery, isTimeseries));
}
@ -274,30 +279,32 @@ public class ESUtils {
final Condition condition = criterion.getCondition();
final String fieldName = toFacetField(criterion.getField());
if (condition == Condition.EQUAL) {
return buildEqualsConditionFromCriterion(fieldName, criterion, isTimeseries);
} else if (condition == Condition.IS_NULL) {
if (condition == Condition.IS_NULL) {
return QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(criterion.getField())).queryName(fieldName);
} else if (condition == Condition.EXISTS) {
return QueryBuilders.boolQuery().must(QueryBuilders.existsQuery(criterion.getField())).queryName(fieldName);
// TODO: Support multi-match on the following operators (using new 'values' field)
} else if (condition == Condition.GREATER_THAN) {
return QueryBuilders.rangeQuery(criterion.getField()).gt(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) {
return QueryBuilders.rangeQuery(criterion.getField()).gte(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.LESS_THAN) {
return QueryBuilders.rangeQuery(criterion.getField()).lt(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.LESS_THAN_OR_EQUAL_TO) {
return QueryBuilders.rangeQuery(criterion.getField()).lte(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.CONTAIN) {
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
} else if (condition == Condition.START_WITH) {
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
} else if (condition == Condition.END_WITH) {
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim())).queryName(fieldName);
} else if (criterion.hasValues() || criterion.hasValue()) {
if (condition == Condition.EQUAL) {
return buildEqualsConditionFromCriterion(fieldName, criterion, isTimeseries);
// TODO: Support multi-match on the following operators (using new 'values' field)
} else if (condition == Condition.GREATER_THAN) {
return QueryBuilders.rangeQuery(criterion.getField()).gt(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) {
return QueryBuilders.rangeQuery(criterion.getField()).gte(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.LESS_THAN) {
return QueryBuilders.rangeQuery(criterion.getField()).lt(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.LESS_THAN_OR_EQUAL_TO) {
return QueryBuilders.rangeQuery(criterion.getField()).lte(criterion.getValue().trim()).queryName(fieldName);
} else if (condition == Condition.CONTAIN) {
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
} else if (condition == Condition.START_WITH) {
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*").queryName(fieldName);
} else if (condition == Condition.END_WITH) {
return QueryBuilders.wildcardQuery(toKeywordField(criterion.getField(), isTimeseries),
"*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim())).queryName(fieldName);
}
}
throw new UnsupportedOperationException("Unsupported condition: " + condition);
}

View File

@ -29,6 +29,7 @@ import com.linkedin.metadata.query.filter.CriterionArray;
import com.linkedin.metadata.query.filter.Filter;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.ExistsQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.TermsQueryBuilder;
@ -215,7 +216,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
}
private void testFilterQuery(BoolQueryBuilder testQuery) {
Optional<MatchQueryBuilder> mustNotHaveRemovedCondition = testQuery.must()
Optional<MatchQueryBuilder> mustNotHaveRemovedCondition = testQuery.filter()
.stream()
.filter(or -> or instanceof BoolQueryBuilder)
.map(or -> (BoolQueryBuilder) or)
@ -292,13 +293,13 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
// bool -> must -> [bool] -> should -> [bool] -> must -> [bool] -> should -> [terms]
List<TermsQueryBuilder> termsQueryBuilders = testQuery.must()
// bool -> filter -> [bool] -> should -> [bool] -> filter -> [bool] -> should -> [terms]
List<TermsQueryBuilder> termsQueryBuilders = testQuery.filter()
.stream()
.filter(or -> or instanceof BoolQueryBuilder)
.flatMap(or -> ((BoolQueryBuilder) or).should().stream())
.filter(should -> should instanceof BoolQueryBuilder)
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
.filter(must -> must instanceof BoolQueryBuilder)
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
.filter(should -> should instanceof TermsQueryBuilder)
@ -342,12 +343,12 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
// bool -> must -> [bool] -> should -> [bool] -> must -> [bool] -> should -> [bool] -> should -> [match]
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.must()
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.filter()
.stream()
.filter(or -> or instanceof BoolQueryBuilder)
.flatMap(or -> ((BoolQueryBuilder) or).should().stream())
.filter(should -> should instanceof BoolQueryBuilder)
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
.filter(must -> must instanceof BoolQueryBuilder)
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
.filter(should -> should instanceof BoolQueryBuilder)
@ -381,13 +382,13 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
// bool -> must -> [bool] -> should -> [bool] -> must -> [bool] -> should -> [match]
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.must()
// bool -> filter -> [bool] -> should -> [bool] -> filter -> [bool] -> should -> [match]
List<MultiMatchQueryBuilder> matchQueryBuilders = testQuery.filter()
.stream()
.filter(or -> or instanceof BoolQueryBuilder)
.flatMap(or -> ((BoolQueryBuilder) or).should().stream())
.filter(should -> should instanceof BoolQueryBuilder)
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
.filter(must -> must instanceof BoolQueryBuilder)
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
.filter(should -> should instanceof MultiMatchQueryBuilder)
@ -414,13 +415,13 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
final BoolQueryBuilder testQuery = getQuery(filterCriterion);
// bool -> must -> [bool] -> should -> [bool] -> must -> [terms]
List<TermsQueryBuilder> termsQueryBuilders = testQuery.must()
// bool -> filter -> [bool] -> should -> [bool] -> filter -> [terms]
List<TermsQueryBuilder> termsQueryBuilders = testQuery.filter()
.stream()
.filter(must -> must instanceof BoolQueryBuilder)
.flatMap(must -> ((BoolQueryBuilder) must).should().stream())
.filter(should -> should instanceof BoolQueryBuilder)
.flatMap(should -> ((BoolQueryBuilder) should).must().stream())
.flatMap(should -> ((BoolQueryBuilder) should).filter().stream())
.filter(must -> must instanceof TermsQueryBuilder)
.map(must -> (TermsQueryBuilder) must)
.collect(Collectors.toList());
@ -439,6 +440,44 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
assertTrue(values.contains("bigquery"));
}
@Test
public void testBrowsePathQueryFilter() {
// Condition: has `browsePaths` AND does NOT have `browsePathV2`
Criterion missingBrowsePathV2 = new Criterion();
missingBrowsePathV2.setCondition(Condition.IS_NULL);
missingBrowsePathV2.setField("browsePathV2");
// Excludes entities without browsePaths
Criterion hasBrowsePathV1 = new Criterion();
hasBrowsePathV1.setCondition(Condition.EXISTS);
hasBrowsePathV1.setField("browsePaths");
CriterionArray criterionArray = new CriterionArray();
criterionArray.add(missingBrowsePathV2);
criterionArray.add(hasBrowsePathV1);
ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
conjunctiveCriterion.setAnd(criterionArray);
ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
conjunctiveCriterionArray.add(conjunctiveCriterion);
Filter filter = new Filter();
filter.setOr(conjunctiveCriterionArray);
BoolQueryBuilder test = SearchRequestHandler.getFilterQuery(filter);
assertEquals(test.should().size(), 1);
BoolQueryBuilder shouldQuery = (BoolQueryBuilder) test.should().get(0);
assertEquals(shouldQuery.filter().size(), 2);
BoolQueryBuilder mustNotHaveV2 = (BoolQueryBuilder) shouldQuery.filter().get(0);
assertEquals(((ExistsQueryBuilder) mustNotHaveV2.mustNot().get(0)).fieldName(), "browsePathV2");
BoolQueryBuilder mustHaveV1 = (BoolQueryBuilder) shouldQuery.filter().get(1);
assertEquals(((ExistsQueryBuilder) mustHaveV1.must().get(0)).fieldName(), "browsePaths");
}
private BoolQueryBuilder getQuery(final Criterion filterCriterion) {
final Filter filter = new Filter().setOr(
new ConjunctiveCriterionArray(

View File

@ -9,6 +9,12 @@ import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.boot.UpgradeStep;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.query.filter.Condition;
import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
import com.linkedin.metadata.query.filter.Criterion;
import com.linkedin.metadata.query.filter.CriterionArray;
import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.search.ScrollResult;
import com.linkedin.metadata.search.SearchEntity;
import com.linkedin.metadata.search.SearchService;
@ -69,17 +75,39 @@ public class BackfillBrowsePathsV2Step extends UpgradeStep {
}
private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, String scrollId)
throws Exception {
throws Exception {
// Condition: has `browsePaths` AND does NOT have `browsePathV2`
Criterion missingBrowsePathV2 = new Criterion();
missingBrowsePathV2.setCondition(Condition.IS_NULL);
missingBrowsePathV2.setField("browsePathV2");
// Excludes entities without browsePaths
Criterion hasBrowsePathV1 = new Criterion();
hasBrowsePathV1.setCondition(Condition.EXISTS);
hasBrowsePathV1.setField("browsePaths");
CriterionArray criterionArray = new CriterionArray();
criterionArray.add(missingBrowsePathV2);
criterionArray.add(hasBrowsePathV1);
ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
conjunctiveCriterion.setAnd(criterionArray);
ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
conjunctiveCriterionArray.add(conjunctiveCriterion);
Filter filter = new Filter();
filter.setOr(conjunctiveCriterionArray);
final ScrollResult scrollResult = _searchService.scrollAcrossEntities(
ImmutableList.of(entityType),
"*",
null,
null,
scrollId,
"5m",
BATCH_SIZE,
null
ImmutableList.of(entityType),
"*",
filter,
null,
scrollId,
"5m",
BATCH_SIZE,
null
);
if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) {
return null;

View File

@ -14,6 +14,7 @@ import com.linkedin.entity.EnvelopedAspectMap;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.search.ScrollResult;
import com.linkedin.metadata.search.SearchEntity;
import com.linkedin.metadata.search.SearchEntityArray;
@ -88,7 +89,7 @@ public class BackfillBrowsePathsV2StepTest {
Mockito.verify(mockSearchService, Mockito.times(9)).scrollAcrossEntities(
Mockito.any(),
Mockito.eq("*"),
Mockito.eq(null),
Mockito.any(Filter.class),
Mockito.eq(null),
Mockito.eq(null),
Mockito.eq("5m"),
@ -157,7 +158,7 @@ public class BackfillBrowsePathsV2StepTest {
Mockito.when(mockSearchService.scrollAcrossEntities(
Mockito.eq(ImmutableList.of(ENTITY_TYPES.get(i))),
Mockito.eq("*"),
Mockito.eq(null),
Mockito.any(Filter.class),
Mockito.eq(null),
Mockito.eq(null),
Mockito.eq("5m"),

View File

@ -7,7 +7,7 @@ describe("run managed ingestion", () => {
it("create run managed ingestion source", () => {
let number = Math.floor(Math.random() * 100000);
let testName = `cypress test source ${number}`
let cli_version = "0.9.3.3rc5";
let cli_version = "0.10.4.3";
cy.login();
cy.goToIngestionPage();
cy.clickOptionWithText("Create new source");