From 55cfb95cad7a505de34c129aa36fec4da072ada1 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:43:37 -0500 Subject: [PATCH] fix(search): additional search size defaults (#13888) --- .../query/request/SearchRequestHandler.java | 7 +- .../metadata/search/query/BrowseDAOTest.java | 10 +- .../request/SearchRequestHandlerTest.java | 226 ++++++++++++++++++ 3 files changed, 236 insertions(+), 7 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 207627b778..2eb409bb62 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -411,7 +411,7 @@ public class SearchRequestHandler extends BaseRequestHandler { @Nonnull SearchResponse searchResponse, Filter filter, int from, - int size) { + @Nullable Integer size) { int totalCount = (int) searchResponse.getHits().getTotalHits().value; Collection resultList = getRestrictedResults(opContext, searchResponse); SearchResultMetadata searchResultMetadata = @@ -421,7 +421,7 @@ public class SearchRequestHandler extends BaseRequestHandler { .setEntities(new SearchEntityArray(resultList)) .setMetadata(searchResultMetadata) .setFrom(from) - .setPageSize(size) + .setPageSize(ConfigUtils.applyLimit(searchServiceConfig, size)) .setNumEntities(totalCount); } @@ -431,9 +431,10 @@ public class SearchRequestHandler extends BaseRequestHandler { @Nonnull SearchResponse searchResponse, Filter filter, @Nullable String keepAlive, - int size, + @Nullable Integer size, boolean supportsPointInTime) { int totalCount = (int) searchResponse.getHits().getTotalHits().value; + size = ConfigUtils.applyLimit(searchServiceConfig, size); Collection resultList = getRestrictedResults(opContext, searchResponse); SearchResultMetadata searchResultMetadata = extractSearchResultMetadata(opContext, searchResponse, filter); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index ffe1d9379b..7b69bf3339 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -163,10 +163,12 @@ public class BrowseDAOTest extends AbstractTestNGSpringContextTests { TEST_ES_SEARCH_CONFIG, customSearchConfiguration, QueryFilterRewriteChain.EMPTY, - TEST_SEARCH_SERVICE_CONFIG.setLimit( - new LimitConfig() - .setResults( - new ResultsLimitConfig().setMax(15).setApiDefault(15).setStrict(false)))); + TEST_SEARCH_SERVICE_CONFIG.toBuilder() + .limit( + new LimitConfig() + .setResults( + new ResultsLimitConfig().setMax(15).setApiDefault(15).setStrict(false))) + .build()); // Test browse with size that exceeds limit int requestedSize = 20; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index a5e364cba5..9ebf498870 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -41,6 +41,8 @@ import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import io.datahubproject.metadata.context.OperationContext; @@ -58,13 +60,17 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.lucene.search.TotalHits; import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; @@ -1194,6 +1200,226 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { assertEquals(sourceBuilder.size(), 25); } + @Test + public void testExtractResultWithNullSize() { + // Create a mock SearchResponse + SearchResponse mockResponse = mock(SearchResponse.class); + SearchHits mockHits = mock(SearchHits.class); + + when(mockResponse.getHits()).thenReturn(mockHits); + when(mockHits.getTotalHits()).thenReturn(new TotalHits(100L, TotalHits.Relation.EQUAL_TO)); + when(mockHits.getHits()).thenReturn(new SearchHit[0]); + when(mockResponse.getAggregations()).thenReturn(null); + when(mockResponse.getSuggest()).thenReturn(null); + + SearchRequestHandler handler = + SearchRequestHandler.getBuilder( + operationContext, + TestEntitySpecBuilder.getSpec(), + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY, + TEST_SEARCH_SERVICE_CONFIG); + + // Test with null size + SearchResult result = handler.extractResult(operationContext, mockResponse, null, 0, null); + + // Should use the default from the service config + assertEquals( + result.getPageSize().intValue(), + TEST_SEARCH_SERVICE_CONFIG.getLimit().getResults().getApiDefault()); + assertEquals(result.getFrom().intValue(), 0); + assertEquals(result.getNumEntities().intValue(), 100); + } + + @Test + public void testExtractResultWithLimitConfiguration() { + // Create a custom SearchServiceConfiguration with specific limits + SearchServiceConfiguration limitConfig = + TEST_SEARCH_SERVICE_CONFIG.toBuilder() + .limit( + LimitConfig.builder() + .results( + ResultsLimitConfig.builder().max(50).apiDefault(30).strict(false).build()) + .build()) + .build(); + + SearchRequestHandler limitHandler = + SearchRequestHandler.getBuilder( + operationContext, + TestEntitySpecBuilder.getSpec(), + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY, + limitConfig); + + // Create a mock SearchResponse + SearchResponse mockResponse = mock(SearchResponse.class); + SearchHits mockHits = mock(SearchHits.class); + + when(mockResponse.getHits()).thenReturn(mockHits); + when(mockHits.getTotalHits()).thenReturn(new TotalHits(200L, TotalHits.Relation.EQUAL_TO)); + when(mockHits.getHits()).thenReturn(new SearchHit[0]); + when(mockResponse.getAggregations()).thenReturn(null); + when(mockResponse.getSuggest()).thenReturn(null); + + // Test with size above limit + SearchResult result = + limitHandler.extractResult( + operationContext, mockResponse, null, 0, 100); // Requesting 100, but max is 50 + + // Should be limited to 30, applying default + assertEquals(result.getPageSize().intValue(), 30); + + // Test with size below limit + result = limitHandler.extractResult(operationContext, mockResponse, null, 0, 25); + + // Should use the requested size + assertEquals(result.getPageSize().intValue(), 25); + + // Test with null size - should use API default + result = limitHandler.extractResult(operationContext, mockResponse, null, 0, null); + + // Should use the API default (30) + assertEquals(result.getPageSize().intValue(), 30); + } + + @Test + public void testExtractScrollResultWithNullSize() { + // Create a mock SearchResponse + SearchResponse mockResponse = mock(SearchResponse.class); + SearchHits mockHits = mock(SearchHits.class); + + when(mockResponse.getHits()).thenReturn(mockHits); + when(mockHits.getTotalHits()).thenReturn(new TotalHits(100L, TotalHits.Relation.EQUAL_TO)); + when(mockHits.getHits()).thenReturn(new SearchHit[0]); + when(mockResponse.getAggregations()).thenReturn(null); + when(mockResponse.getSuggest()).thenReturn(null); + when(mockResponse.pointInTimeId()).thenReturn("test-pit-id"); + + SearchRequestHandler handler = + SearchRequestHandler.getBuilder( + operationContext, + TestEntitySpecBuilder.getSpec(), + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY, + TEST_SEARCH_SERVICE_CONFIG); + + // Test with null size + ScrollResult result = + handler.extractScrollResult(operationContext, mockResponse, null, "5m", null, true); + + // Should use the default from the service config default + assertEquals( + result.getPageSize().intValue(), + TEST_SEARCH_SERVICE_CONFIG.getLimit().getResults().getApiDefault()); + assertEquals(result.getNumEntities().intValue(), 100); + } + + @Test + public void testExtractScrollResultWithLimitConfiguration() { + // Create a custom SearchServiceConfiguration with specific limits + SearchServiceConfiguration limitConfig = + TEST_SEARCH_SERVICE_CONFIG.toBuilder() + .limit( + LimitConfig.builder() + .results( + ResultsLimitConfig.builder().max(40).apiDefault(40).strict(false).build()) + .build()) + .build(); + + SearchRequestHandler limitHandler = + SearchRequestHandler.getBuilder( + operationContext, + TestEntitySpecBuilder.getSpec(), + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY, + limitConfig); + + // Test with size above limit + ScrollResult result = verifyScrollResultSize(limitHandler, 40, 80, true); + + // Should be limited to 40 as the default + assertEquals(result.getPageSize().intValue(), 40); + assertNotNull(result.getScrollId()); // Should have next scroll ID since we have full page + + // Test with size below limit - partial page + result = verifyScrollResultSize(limitHandler, 15, 20, false); + + // Should use the requested size + assertEquals(result.getPageSize().intValue(), 20); + assertFalse(result.hasScrollId()); // No next scroll ID since results < page size + + // Test with null size - should use API default + result = verifyScrollResultSize(limitHandler, 40, null, true); + + // Should use the API default (40) + assertEquals(result.getPageSize().intValue(), 40); + assertNotNull(result.getScrollId()); // Should have next scroll ID + } + + @Test + public void testExtractScrollResultPaginationLogic() { + SearchRequestHandler handler = + SearchRequestHandler.getBuilder( + operationContext, + TestEntitySpecBuilder.getSpec(), + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY, + TEST_SEARCH_SERVICE_CONFIG); + + // Test when results equal page size - should have scroll ID + ScrollResult result = verifyScrollResultSize(handler, 10, 10, true); + + assertEquals(result.getPageSize().intValue(), 10); + assertNotNull(result.getScrollId()); + + // Test when results less than page size - should NOT have scroll ID + result = verifyScrollResultSize(handler, 5, 10, false); + + assertEquals(result.getPageSize().intValue(), 10); + assertFalse(result.hasScrollId()); + } + + // Helper method to create scroll results with specific sizes + private ScrollResult verifyScrollResultSize( + SearchRequestHandler handler, + int actualResults, + Integer requestedSize, + boolean expectScrollId) { + + SearchResponse mockResponse = mock(SearchResponse.class); + SearchHits mockHits = mock(SearchHits.class); + + when(mockResponse.getHits()).thenReturn(mockHits); + when(mockHits.getTotalHits()).thenReturn(new TotalHits(100L, TotalHits.Relation.EQUAL_TO)); + when(mockResponse.getAggregations()).thenReturn(null); + when(mockResponse.getSuggest()).thenReturn(null); + when(mockResponse.pointInTimeId()).thenReturn("test-pit-id"); + + // Create array of mock hits + SearchHit[] hits = new SearchHit[actualResults]; + for (int i = 0; i < actualResults; i++) { + SearchHit mockHit = mock(SearchHit.class); + when(mockHit.getSourceAsMap()) + .thenReturn( + ImmutableMap.of( + "urn", "urn:li:dataset:(urn:li:dataPlatform:hdfs,test" + i + ",PROD)")); + when(mockHit.getScore()).thenReturn(1.0f); + when(mockHit.getHighlightFields()).thenReturn(ImmutableMap.of()); + when(mockHit.getMatchedQueries()).thenReturn(new String[0]); + when(mockHit.getSortValues()).thenReturn(new Object[] {"sortValue" + i}); + hits[i] = mockHit; + } + when(mockHits.getHits()).thenReturn(hits); + + return handler.extractScrollResult( + operationContext, mockResponse, null, "5m", requestedSize, true); + } + private BoolQueryBuilder getQuery(final Criterion filterCriterion) { return getQuery(filterCriterion, TestEntitySpecBuilder.getSpec(), true); }