fix(search): Filter out "removed" entities from autocomplete and analytics (#2781)

This commit is contained in:
Dexter Lee 2021-06-29 17:56:09 -07:00 committed by GitHub
parent 69d9cf46de
commit 6fee59ebac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 42 additions and 23 deletions

View File

@ -1,6 +1,7 @@
package react.analytics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.linkedin.metadata.dao.exception.ESQueryException;
import graphql.BarSegment;
import graphql.DateInterval;
@ -64,13 +65,12 @@ public class AnalyticsService {
public List<NamedLine> getTimeseriesChart(String indexName, DateRange dateRange, DateInterval granularity,
Optional<String> dimension, // Length 1 for now
Map<String, List<String>> filters, Optional<String> uniqueOn) {
_logger.debug(
String.format("Invoked getTimeseriesChart with indexName: %s, dateRange: %s, granularity: %s, dimension: %s,",
indexName, dateRange, granularity, dimension)
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));
AggregationBuilder filteredAgg = getFilteredAggregation(filters, Optional.of(dateRange));
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), Optional.of(dateRange));
AggregationBuilder dateHistogram = AggregationBuilders.dateHistogram(DATE_HISTOGRAM)
.field("timestamp")
@ -123,8 +123,7 @@ public class AnalyticsService {
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));
assert (dimensions.size() == 1 || dimensions.size() == 2);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);
AggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(dimensions.get(0)).missing(NA);
if (dimensions.size() == 2) {
@ -170,13 +169,12 @@ public class AnalyticsService {
public List<Row> getTopNTableChart(String indexName, Optional<DateRange> dateRange, String groupBy,
Map<String, List<String>> filters, Optional<String> uniqueOn, int maxRows) {
_logger.debug(
String.format("Invoked getTopNTableChart with indexName: %s, dateRange: %s, groupBy: %s",
indexName, dateRange, groupBy)
+ String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));
AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);
TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(groupBy).size(maxRows);
if (uniqueOn.isPresent()) {
@ -201,8 +199,8 @@ public class AnalyticsService {
}
public int getHighlights(String indexName, Optional<DateRange> dateRange, Map<String, List<String>> filters,
Optional<String> uniqueOn) {
AggregationBuilder filteredAgg = getFilteredAggregation(filters, dateRange);
Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn) {
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange);
uniqueOn.ifPresent(s -> filteredAgg.subAggregation(getUniqueQuery(s)));
SearchRequest searchRequest = constructSearchRequest(indexName, filteredAgg);
@ -239,9 +237,11 @@ public class AnalyticsService {
}
}
private AggregationBuilder getFilteredAggregation(Map<String, List<String>> filters, Optional<DateRange> dateRange) {
private AggregationBuilder getFilteredAggregation(Map<String, List<String>> mustFilters,
Map<String, List<String>> mustNotFilters, Optional<DateRange> dateRange) {
BoolQueryBuilder filteredQuery = QueryBuilders.boolQuery();
filters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values)));
mustFilters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values)));
mustNotFilters.forEach((key, values) -> filteredQuery.mustNot(QueryBuilders.termsQuery(key, values)));
dateRange.ifPresent(range -> filteredQuery.must(dateRangeQuery(range)));
return AggregationBuilders.filter(FILTERED, filteredQuery);
}

View File

@ -49,11 +49,11 @@ public final class GetHighlightsResolver implements DataFetcher<List<Highlight>>
int weeklyActiveUsers =
_analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRange),
ImmutableMap.of(), Optional.of("browserId"));
ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId"));
int weeklyActiveUsersLastWeek =
_analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRangeLastWeek),
ImmutableMap.of(), Optional.of("browserId"));
ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId"));
String bodyText = "";
if (weeklyActiveUsersLastWeek > 0) {
@ -79,10 +79,11 @@ public final class GetHighlightsResolver implements DataFetcher<List<Highlight>>
}
private Highlight getEntityMetadataStats(String title, String index) {
int numEntities = _analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of(), Optional.empty());
int numEntities = _analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty());
int numEntitiesWithOwners =
_analyticsService.getHighlights(index, Optional.empty(), ImmutableMap.of("hasOwners", ImmutableList.of("true")),
Optional.empty());
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty());
String bodyText = "";
if (numEntities > 0) {
double percentChange = 100.0 * numEntitiesWithOwners / numEntities;

View File

@ -21,6 +21,7 @@ import javax.annotation.Nullable;
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
@ -65,13 +66,17 @@ public class AutocompleteRequestHandler {
}
private QueryBuilder getQuery(@Nonnull String query, @Nullable String field) {
BoolQueryBuilder finalQuery = QueryBuilders.boolQuery();
// Search for exact matches with higher boost and ngram matches
List<String> fieldNames = getAutocompleteFields(field).stream()
.flatMap(fieldName -> Stream.of(fieldName, fieldName + ".ngram"))
.collect(Collectors.toList());
MultiMatchQueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(query, fieldNames.toArray(new String[0]));
queryBuilder.analyzer(ANALYZER);
return queryBuilder;
MultiMatchQueryBuilder autocompleteQueryBuilder =
QueryBuilders.multiMatchQuery(query, fieldNames.toArray(new String[0]));
autocompleteQueryBuilder.analyzer(ANALYZER);
finalQuery.should(autocompleteQueryBuilder);
finalQuery.mustNot(QueryBuilders.matchQuery("removed", true));
return finalQuery;
}
// Get HighlightBuilder to highlight the matched field

View File

@ -4,6 +4,8 @@ import com.linkedin.metadata.TestEntitySpecBuilder;
import java.util.List;
import java.util.Map;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
@ -22,11 +24,17 @@ public class AutocompleteRequestHandlerTest {
SearchRequest autocompleteRequest = handler.getSearchRequest("input", null, null, 10);
SearchSourceBuilder sourceBuilder = autocompleteRequest.source();
assertEquals(sourceBuilder.size(), 10);
MultiMatchQueryBuilder queryBuilder = (MultiMatchQueryBuilder) sourceBuilder.query();
Map<String, Float> queryFields = queryBuilder.fields();
BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query();
assertEquals(query.should().size(), 1);
MultiMatchQueryBuilder matchQuery = (MultiMatchQueryBuilder) query.should().get(0);
Map<String, Float> queryFields = matchQuery.fields();
assertTrue(queryFields.containsKey("keyPart1"));
assertTrue(queryFields.containsKey("keyPart1.ngram"));
assertEquals(queryBuilder.analyzer(), "word_delimited");
assertEquals(matchQuery.analyzer(), "word_delimited");
assertEquals(query.mustNot().size(), 1);
MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0);
assertEquals(removedFilter.fieldName(), "removed");
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List<HighlightBuilder.Field> highlightedFields = highlightBuilder.fields();
assertEquals(highlightedFields.size(), 2);
@ -40,11 +48,16 @@ public class AutocompleteRequestHandlerTest {
SearchRequest autocompleteRequest = handler.getSearchRequest("input", "field", null, 10);
SearchSourceBuilder sourceBuilder = autocompleteRequest.source();
assertEquals(sourceBuilder.size(), 10);
MultiMatchQueryBuilder queryBuilder = (MultiMatchQueryBuilder) sourceBuilder.query();
Map<String, Float> queryFields = queryBuilder.fields();
BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query();
assertEquals(query.should().size(), 1);
MultiMatchQueryBuilder matchQuery = (MultiMatchQueryBuilder) query.should().get(0);
Map<String, Float> queryFields = matchQuery.fields();
assertTrue(queryFields.containsKey("field"));
assertTrue(queryFields.containsKey("field.ngram"));
assertEquals(queryBuilder.analyzer(), "word_delimited");
assertEquals(matchQuery.analyzer(), "word_delimited");
MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0);
assertEquals(removedFilter.fieldName(), "removed");
assertEquals(removedFilter.value(), true);
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List<HighlightBuilder.Field> highlightedFields = highlightBuilder.fields();
assertEquals(highlightedFields.size(), 2);