fix(search): respect the search flags term bucket size (#10130)

This commit is contained in:
david-leifker 2024-03-25 17:39:16 -05:00 committed by GitHub
parent 5195d3a952
commit fc03a1c3ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 39 additions and 15 deletions

View File

@ -20,6 +20,7 @@ import com.linkedin.metadata.search.FilterValueArray;
import com.linkedin.metadata.search.utils.ESUtils;
import com.linkedin.metadata.utils.SearchUtil;
import com.linkedin.util.Pair;
import io.datahubproject.metadata.context.OperationContext;
import io.opentelemetry.extension.annotations.WithSpan;
import java.util.ArrayList;
import java.util.Arrays;
@ -73,15 +74,16 @@ public class AggregationQueryBuilder {
}
/** Get the set of default aggregations, across all facets. */
public List<AggregationBuilder> getAggregations() {
return getAggregations(null);
public List<AggregationBuilder> getAggregations(@Nonnull OperationContext opContext) {
return getAggregations(opContext, null);
}
/**
* Get aggregations for a search request for the given facets provided, and if none are provided,
* then get aggregations for all.
*/
public List<AggregationBuilder> getAggregations(@Nullable List<String> facets) {
public List<AggregationBuilder> getAggregations(
@Nonnull OperationContext opContext, @Nullable List<String> facets) {
final Set<String> facetsToAggregate;
if (facets != null) {
facetsToAggregate =
@ -90,7 +92,7 @@ public class AggregationQueryBuilder {
facetsToAggregate = defaultFacetFields;
}
return facetsToAggregate.stream()
.map(this::facetToAggregationBuilder)
.map(f -> facetToAggregationBuilder(opContext, f))
.collect(Collectors.toList());
}
@ -129,9 +131,14 @@ public class AggregationQueryBuilder {
return isValid;
}
private AggregationBuilder facetToAggregationBuilder(final String inputFacet) {
private AggregationBuilder facetToAggregationBuilder(
@Nonnull OperationContext opContext, final String inputFacet) {
List<String> facets = List.of(inputFacet.split(AGGREGATION_SEPARATOR_CHAR));
AggregationBuilder lastAggBuilder = null;
int maxTermBuckets =
Math.min(
opContext.getSearchContext().getSearchFlags().getMaxAggValues(),
configs.getMaxTermBucketSize());
for (int i = facets.size() - 1; i >= 0; i--) {
String facet = facets.get(i);
if (facet.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
@ -163,11 +170,11 @@ public class AggregationQueryBuilder {
facet.equalsIgnoreCase(INDEX_VIRTUAL_FIELD)
? AggregationBuilders.terms(inputFacet)
.field(getAggregationField(ES_INDEX_FIELD))
.size(configs.getMaxTermBucketSize())
.size(maxTermBuckets)
.minDocCount(0)
: AggregationBuilders.terms(inputFacet)
.field(getAggregationField(facet))
.size(configs.getMaxTermBucketSize());
.size(maxTermBuckets);
}
if (lastAggBuilder != null) {
aggBuilder = aggBuilder.subAggregation(lastAggBuilder);

View File

@ -219,7 +219,9 @@ public class SearchRequestHandler {
.must(getQuery(input, Boolean.TRUE.equals(searchFlags.isFulltext())))
.filter(filterQuery));
if (Boolean.FALSE.equals(searchFlags.isSkipAggregates())) {
aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation);
aggregationQueryBuilder
.getAggregations(opContext, facets)
.forEach(searchSourceBuilder::aggregation);
}
if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) {
searchSourceBuilder.highlighter(highlights);
@ -276,7 +278,9 @@ public class SearchRequestHandler {
.must(getQuery(input, Boolean.TRUE.equals(searchFlags.isFulltext())))
.filter(filterQuery));
if (Boolean.FALSE.equals(searchFlags.isSkipAggregates())) {
aggregationQueryBuilder.getAggregations(facets).forEach(searchSourceBuilder::aggregation);
aggregationQueryBuilder
.getAggregations(opContext, facets)
.forEach(searchSourceBuilder::aggregation);
}
if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) {
searchSourceBuilder.highlighter(highlights);

View File

@ -15,6 +15,7 @@ import com.linkedin.metadata.models.annotation.SearchableAnnotation;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder;
import com.linkedin.r2.RemoteInvocationException;
import io.datahubproject.test.metadata.context.TestOperationContexts;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.List;
@ -67,7 +68,8 @@ public class AggregationQueryBuilderTest {
ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation)),
aspectRetriever);
List<AggregationBuilder> aggs = builder.getAggregations();
List<AggregationBuilder> aggs =
builder.getAggregations(TestOperationContexts.systemContextNoSearchAuthorization());
Assert.assertTrue(aggs.stream().anyMatch(agg -> agg.getName().equals("hasTest")));
}
@ -101,7 +103,8 @@ public class AggregationQueryBuilderTest {
ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation)),
aspectRetriever);
List<AggregationBuilder> aggs = builder.getAggregations();
List<AggregationBuilder> aggs =
builder.getAggregations(TestOperationContexts.systemContextNoSearchAuthorization());
Assert.assertTrue(aggs.stream().anyMatch(agg -> agg.getName().equals("test")));
}
@ -153,13 +156,18 @@ public class AggregationQueryBuilderTest {
// Case 1: Ask for fields that should exist.
List<AggregationBuilder> aggs =
builder.getAggregations(ImmutableList.of("test1", "test2", "hasTest1"));
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
ImmutableList.of("test1", "test2", "hasTest1"));
Assert.assertEquals(aggs.size(), 3);
Set<String> facets = aggs.stream().map(AggregationBuilder::getName).collect(Collectors.toSet());
Assert.assertEquals(ImmutableSet.of("test1", "test2", "hasTest1"), facets);
// Case 2: Ask for fields that should NOT exist.
aggs = builder.getAggregations(ImmutableList.of("hasTest2"));
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
ImmutableList.of("hasTest2"));
Assert.assertEquals(aggs.size(), 0);
}
@ -173,7 +181,9 @@ public class AggregationQueryBuilderTest {
config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()), aspectRetriever);
List<AggregationBuilder> aggs =
builder.getAggregations(List.of("structuredProperties.ab.fgh.ten"));
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
List.of("structuredProperties.ab.fgh.ten"));
Assert.assertEquals(aggs.size(), 1);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
@ -184,6 +194,7 @@ public class AggregationQueryBuilderTest {
// Two structured properties
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
List.of("structuredProperties.ab.fgh.ten", "structuredProperties.hello"));
Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
@ -241,6 +252,7 @@ public class AggregationQueryBuilderTest {
// Aggregate over fields and structured properties
List<AggregationBuilder> aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(),
ImmutableList.of(
"test1",
"test2",
@ -291,7 +303,8 @@ public class AggregationQueryBuilderTest {
ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation)),
aspectRetriever);
List<AggregationBuilder> aggs = builder.getAggregations();
List<AggregationBuilder> aggs =
builder.getAggregations(TestOperationContexts.systemContextNoSearchAuthorization());
Assert.assertTrue(aggs.stream().anyMatch(agg -> agg.getName().equals("hasTest")));
Assert.assertTrue(