fix(homepage): make entity counts execute in parallel and make cache configurable (#7249)

This commit is contained in:
RyanHolstien 2023-02-14 07:42:39 -06:00 committed by GitHub
parent 21d9cb62d8
commit bc61d1e446
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 125 additions and 27 deletions

View File

@ -0,0 +1,10 @@
package com.linkedin.metadata.config;
import lombok.Data;
@Data
public class CacheConfiguration {
PrimaryCacheConfiguration primary;
HomepageCacheConfiguration homepage;
}

View File

@ -0,0 +1,9 @@
package com.linkedin.metadata.config;
import lombok.Data;
@Data
public class EntityDocCountCacheConfiguration {
long ttlSeconds;
}

View File

@ -0,0 +1,9 @@
package com.linkedin.metadata.config;
import lombok.Data;
@Data
public class HomepageCacheConfiguration {
EntityDocCountCacheConfiguration entityCounts;
}

View File

@ -0,0 +1,10 @@
package com.linkedin.metadata.config;
import lombok.Data;
@Data
public class PrimaryCacheConfiguration {
long ttlSeconds;
long maxSize;
}

View File

@ -3,6 +3,7 @@ package com.linkedin.metadata.search.aggregator;
import com.codahale.metrics.Timer; import com.codahale.metrics.Timer;
import com.linkedin.data.template.GetMode; import com.linkedin.data.template.GetMode;
import com.linkedin.data.template.LongMap; import com.linkedin.data.template.LongMap;
import com.linkedin.metadata.config.EntityDocCountCacheConfiguration;
import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.Filter;
@ -54,11 +55,12 @@ public class AllEntitiesSearchAggregator {
EntityRegistry entityRegistry, EntityRegistry entityRegistry,
EntitySearchService entitySearchService, EntitySearchService entitySearchService,
CachingEntitySearchService cachingEntitySearchService, CachingEntitySearchService cachingEntitySearchService,
SearchRanker searchRanker) { SearchRanker searchRanker,
EntityDocCountCacheConfiguration entityDocCountCacheConfiguration) {
_entitySearchService = Objects.requireNonNull(entitySearchService); _entitySearchService = Objects.requireNonNull(entitySearchService);
_searchRanker = Objects.requireNonNull(searchRanker); _searchRanker = Objects.requireNonNull(searchRanker);
_cachingEntitySearchService = Objects.requireNonNull(cachingEntitySearchService); _cachingEntitySearchService = Objects.requireNonNull(cachingEntitySearchService);
_entityDocCountCache = new EntityDocCountCache(entityRegistry, entitySearchService); _entityDocCountCache = new EntityDocCountCache(entityRegistry, entitySearchService, entityDocCountCacheConfiguration);
_maxAggregationValueCount = DEFAULT_MAX_AGGREGATION_VALUES; // TODO: Make this externally configurable _maxAggregationValueCount = DEFAULT_MAX_AGGREGATION_VALUES; // TODO: Make this externally configurable
} }

View File

@ -1,8 +1,10 @@
package com.linkedin.metadata.search.cache; package com.linkedin.metadata.search.cache;
import com.google.common.base.Suppliers; import com.google.common.base.Suppliers;
import com.linkedin.metadata.config.EntityDocCountCacheConfiguration;
import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.EntitySearchService;
import com.linkedin.metadata.utils.ConcurrencyUtils;
import io.opentelemetry.extension.annotations.WithSpan; import io.opentelemetry.extension.annotations.WithSpan;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -17,17 +19,18 @@ public class EntityDocCountCache {
private final EntitySearchService _entitySearchService; private final EntitySearchService _entitySearchService;
private final Supplier<Map<String, Long>> entityDocCount; private final Supplier<Map<String, Long>> entityDocCount;
public EntityDocCountCache(EntityRegistry entityRegistry, EntitySearchService entitySearchService) { public EntityDocCountCache(EntityRegistry entityRegistry, EntitySearchService entitySearchService,
EntityDocCountCacheConfiguration config) {
_entityRegistry = entityRegistry; _entityRegistry = entityRegistry;
_entitySearchService = entitySearchService; _entitySearchService = entitySearchService;
entityDocCount = Suppliers.memoizeWithExpiration(this::fetchEntityDocCount, 1, TimeUnit.MINUTES); entityDocCount = Suppliers.memoizeWithExpiration(this::fetchEntityDocCount, config.getTtlSeconds(), TimeUnit.SECONDS);
} }
private Map<String, Long> fetchEntityDocCount() { private Map<String, Long> fetchEntityDocCount() {
return _entityRegistry.getEntitySpecs() return ConcurrencyUtils
.keySet() .transformAndCollectAsync(_entityRegistry.getEntitySpecs().keySet(),
.stream() Function.identity(),
.collect(Collectors.toMap(Function.identity(), _entitySearchService::docCount)); Collectors.toMap(Function.identity(), _entitySearchService::docCount));
} }
@WithSpan @WithSpan

View File

@ -3,6 +3,7 @@ package com.linkedin.metadata;
import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.EntityClient;
import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.client.JavaEntityClient;
import com.linkedin.metadata.config.ElasticSearchConfiguration; import com.linkedin.metadata.config.ElasticSearchConfiguration;
import com.linkedin.metadata.config.EntityDocCountCacheConfiguration;
import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.SearchService;
@ -102,9 +103,11 @@ public class ESSampleDataFixture {
int batchSize = 100; int batchSize = 100;
SearchRanker<Double> ranker = new SimpleRanker(); SearchRanker<Double> ranker = new SimpleRanker();
CacheManager cacheManager = new ConcurrentMapCacheManager(); CacheManager cacheManager = new ConcurrentMapCacheManager();
EntityDocCountCacheConfiguration entityDocCountCacheConfiguration = new EntityDocCountCacheConfiguration();
entityDocCountCacheConfiguration.setTtlSeconds(600L);
SearchService service = new SearchService( SearchService service = new SearchService(
new EntityDocCountCache(entityRegistry, entitySearchService), new EntityDocCountCache(entityRegistry, entitySearchService, entityDocCountCacheConfiguration),
new CachingEntitySearchService( new CachingEntitySearchService(
cacheManager, cacheManager,
entitySearchService, entitySearchService,
@ -122,7 +125,8 @@ public class ESSampleDataFixture {
batchSize, batchSize,
false false
), ),
ranker ranker,
entityDocCountCacheConfiguration
), ),
batchSize, batchSize,
false false

View File

@ -3,6 +3,7 @@ package com.linkedin.metadata;
import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.EntityClient;
import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.client.JavaEntityClient;
import com.linkedin.metadata.config.ElasticSearchConfiguration; import com.linkedin.metadata.config.ElasticSearchConfiguration;
import com.linkedin.metadata.config.EntityDocCountCacheConfiguration;
import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO;
import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO; import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO;
@ -149,9 +150,11 @@ public class ESSearchLineageFixture {
int batchSize = 100; int batchSize = 100;
SearchRanker<Double> ranker = new SimpleRanker(); SearchRanker<Double> ranker = new SimpleRanker();
CacheManager cacheManager = new ConcurrentMapCacheManager(); CacheManager cacheManager = new ConcurrentMapCacheManager();
EntityDocCountCacheConfiguration entityDocCountCacheConfiguration = new EntityDocCountCacheConfiguration();
entityDocCountCacheConfiguration.setTtlSeconds(600L);
SearchService service = new SearchService( SearchService service = new SearchService(
new EntityDocCountCache(entityRegistry, entitySearchService), new EntityDocCountCache(entityRegistry, entitySearchService, entityDocCountCacheConfiguration),
new CachingEntitySearchService( new CachingEntitySearchService(
cacheManager, cacheManager,
entitySearchService, entitySearchService,
@ -169,7 +172,8 @@ public class ESSearchLineageFixture {
batchSize, batchSize,
false false
), ),
ranker ranker,
entityDocCountCacheConfiguration
), ),
batchSize, batchSize,
false false

View File

@ -9,6 +9,7 @@ import com.linkedin.common.urn.Urn;
import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor;
import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.metadata.ESTestConfiguration;
import com.linkedin.metadata.TestEntityUtil; import com.linkedin.metadata.TestEntityUtil;
import com.linkedin.metadata.config.EntityDocCountCacheConfiguration;
import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.EntityLineageResult;
import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.GraphService;
import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.graph.LineageDirection;
@ -91,13 +92,16 @@ public class LineageSearchServiceTest extends AbstractTestNGSpringContextTests {
private void resetService(boolean withCache) { private void resetService(boolean withCache) {
CachingEntitySearchService cachingEntitySearchService = new CachingEntitySearchService(_cacheManager, _elasticSearchService, 100, true); CachingEntitySearchService cachingEntitySearchService = new CachingEntitySearchService(_cacheManager, _elasticSearchService, 100, true);
EntityDocCountCacheConfiguration entityDocCountCacheConfiguration = new EntityDocCountCacheConfiguration();
entityDocCountCacheConfiguration.setTtlSeconds(600L);
_lineageSearchService = new LineageSearchService( _lineageSearchService = new LineageSearchService(
new SearchService( new SearchService(
new EntityDocCountCache(_entityRegistry, _elasticSearchService), new EntityDocCountCache(_entityRegistry, _elasticSearchService, entityDocCountCacheConfiguration),
cachingEntitySearchService, cachingEntitySearchService,
new CachingAllEntitiesSearchAggregator( new CachingAllEntitiesSearchAggregator(
_cacheManager, _cacheManager,
new AllEntitiesSearchAggregator(_entityRegistry, _elasticSearchService, cachingEntitySearchService, new SimpleRanker()), new AllEntitiesSearchAggregator(_entityRegistry, _elasticSearchService, cachingEntitySearchService,
new SimpleRanker(), entityDocCountCacheConfiguration),
100, 100,
true), true),
new SimpleRanker()), new SimpleRanker()),

View File

@ -8,6 +8,7 @@ import com.linkedin.common.urn.TestEntityUrn;
import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.Urn;
import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringArray;
import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.metadata.ESTestConfiguration;
import com.linkedin.metadata.config.EntityDocCountCacheConfiguration;
import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.SearchFlags;
@ -82,8 +83,11 @@ public class SearchServiceTest extends AbstractTestNGSpringContextTests {
_elasticSearchService, _elasticSearchService,
100, 100,
true); true);
EntityDocCountCacheConfiguration entityDocCountCacheConfiguration = new EntityDocCountCacheConfiguration();
entityDocCountCacheConfiguration.setTtlSeconds(600L);
_searchService = new SearchService( _searchService = new SearchService(
new EntityDocCountCache(_entityRegistry, _elasticSearchService), new EntityDocCountCache(_entityRegistry, _elasticSearchService, entityDocCountCacheConfiguration),
cachingEntitySearchService, cachingEntitySearchService,
new CachingAllEntitiesSearchAggregator( new CachingAllEntitiesSearchAggregator(
_cacheManager, _cacheManager,
@ -91,7 +95,7 @@ public class SearchServiceTest extends AbstractTestNGSpringContextTests {
_entityRegistry, _entityRegistry,
_elasticSearchService, _elasticSearchService,
cachingEntitySearchService, cachingEntitySearchService,
new SimpleRanker()), new SimpleRanker(), entityDocCountCacheConfiguration),
100, 100,
true), true),
new SimpleRanker()); new SimpleRanker());
@ -161,6 +165,9 @@ public class SearchServiceTest extends AbstractTestNGSpringContextTests {
assertEquals(searchResult.getEntities().get(0).getEntity(), urn2); assertEquals(searchResult.getEntities().get(0).getEntity(), urn2);
clearCache(); clearCache();
long docCount = _elasticSearchService.docCount(ENTITY_NAME);
assertEquals(docCount, 2L);
_elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn.toString());
_elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString()); _elasticSearchService.deleteDocument(ENTITY_NAME, urn2.toString());
syncAfterWrite(_bulkProcessor); syncAfterWrite(_bulkProcessor);

View File

@ -21,10 +21,10 @@ import org.springframework.context.annotation.Configuration;
@Configuration @Configuration
public class CacheConfig { public class CacheConfig {
@Value("${CACHE_TTL_SECONDS:600}") @Value("${cache.primary.ttlSeconds:600}")
private int cacheTtlSeconds; private int cacheTtlSeconds;
@Value("${CACHE_MAX_SIZE:10000}") @Value("${cache.primary.maxSize:10000}")
private int cacheMaxSize; private int cacheMaxSize;
@Value("${searchService.cache.hazelcast.serviceName:hazelcast-service}") @Value("${searchService.cache.hazelcast.serviceName:hazelcast-service}")

View File

@ -3,6 +3,7 @@ package com.linkedin.gms.factory.config;
import com.datahub.authentication.AuthenticationConfiguration; import com.datahub.authentication.AuthenticationConfiguration;
import com.datahub.authorization.AuthorizationConfiguration; import com.datahub.authorization.AuthorizationConfiguration;
import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.datahub.graphql.featureflags.FeatureFlags;
import com.linkedin.metadata.config.CacheConfiguration;
import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.DataHubConfiguration;
import com.linkedin.metadata.config.ElasticSearchConfiguration; import com.linkedin.metadata.config.ElasticSearchConfiguration;
import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.IngestionConfiguration;
@ -69,4 +70,9 @@ public class ConfigurationProvider {
* System Update configurations * System Update configurations
*/ */
private SystemUpdateConfiguration systemUpdate; private SystemUpdateConfiguration systemUpdate;
/**
* Configuration for caching
*/
private CacheConfiguration cache;
} }

View File

@ -1,5 +1,6 @@
package com.linkedin.gms.factory.search; package com.linkedin.gms.factory.search;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.EntitySearchService;
@ -38,11 +39,12 @@ public class AllEntitiesSearchAggregatorFactory {
@Bean(name = "allEntitiesSearchAggregator") @Bean(name = "allEntitiesSearchAggregator")
@Primary @Primary
@Nonnull @Nonnull
protected AllEntitiesSearchAggregator getInstance() { protected AllEntitiesSearchAggregator getInstance(ConfigurationProvider configurationProvider) {
return new AllEntitiesSearchAggregator( return new AllEntitiesSearchAggregator(
entityRegistry, entityRegistry,
entitySearchService, entitySearchService,
cachingEntitySearchService, cachingEntitySearchService,
searchRanker); searchRanker,
configurationProvider.getCache().getHomepage().getEntityCounts());
} }
} }

View File

@ -1,5 +1,6 @@
package com.linkedin.gms.factory.search; package com.linkedin.gms.factory.search;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory;
import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.EntitySearchService;
@ -44,9 +45,10 @@ public class SearchServiceFactory {
@Bean(name = "searchService") @Bean(name = "searchService")
@Primary @Primary
@Nonnull @Nonnull
protected SearchService getInstance() { protected SearchService getInstance(ConfigurationProvider configurationProvider) {
return new SearchService( return new SearchService(
new EntityDocCountCache(entityRegistry, entitySearchService), new EntityDocCountCache(entityRegistry, entitySearchService, configurationProvider.getCache()
.getHomepage().getEntityCounts()),
cachingEntitySearchService, cachingEntitySearchService,
cachingAllEntitiesSearchAggregator, cachingAllEntitiesSearchAggregator,
searchRanker); searchRanker);

View File

@ -255,3 +255,11 @@ entityClient:
usageClient: usageClient:
retryInterval: ${USAGE_CLIENT_RETRY_INTERVAL:2} retryInterval: ${USAGE_CLIENT_RETRY_INTERVAL:2}
numRetries: ${USAGE_CLIENT_NUM_RETRIES:3} numRetries: ${USAGE_CLIENT_NUM_RETRIES:3}
cache:
primary:
ttlSeconds: ${CACHE_TTL_SECONDS:600}
maxSize: ${CACHE_MAX_SIZE:10000}
homepage:
entityCounts:
ttlSeconds: ${CACHE_ENTITY_COUNTS_TTL_SECONDS:600}

View File

@ -1,11 +1,13 @@
package com.linkedin.metadata.utils; package com.linkedin.metadata.utils;
import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collector;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@ -15,18 +17,24 @@ public class ConcurrencyUtils {
private ConcurrencyUtils() { private ConcurrencyUtils() {
} }
public static <O, T> List<T> transformAndCollectAsync(List<O> originalList, Function<O, T> transformer) {
return transformAndCollectAsync(originalList, transformer, Collectors.toList());
}
/** /**
* Transforms original list into the final list using the function transformer in an asynchronous fashion * Transforms original list into the final list using the function transformer in an asynchronous fashion
* i.e. each element transform is run as a separate CompleteableFuture and then joined at the end * i.e. each element transform is run as a separate CompleteableFuture and then joined at the end
*/ */
public static <O, T> List<T> transformAndCollectAsync(List<O> originalList, Function<O, T> transformer) { public static <O, T, OUTPUT> OUTPUT transformAndCollectAsync(Collection<O> originalCollection,
return originalList.stream() Function<O, T> transformer, Collector<T, ?, OUTPUT> collector) {
return originalCollection.stream()
.map(element -> CompletableFuture.supplyAsync(() -> transformer.apply(element))) .map(element -> CompletableFuture.supplyAsync(() -> transformer.apply(element)))
.collect(Collectors.collectingAndThen(Collectors.toList(), .collect(Collectors.collectingAndThen(Collectors.toList(),
completableFutureList -> completableFutureList.stream().map(CompletableFuture::join))) completableFutureList -> completableFutureList.stream().map(CompletableFuture::join)))
.collect(Collectors.toList()); .collect(collector);
} }
/** /**
* Transforms original list into the final list using the function transformer in an asynchronous fashion * Transforms original list into the final list using the function transformer in an asynchronous fashion
* with exceptions handled by the input exceptionHandler * with exceptions handled by the input exceptionHandler
@ -34,13 +42,23 @@ public class ConcurrencyUtils {
*/ */
public static <O, T> List<T> transformAndCollectAsync(List<O> originalList, Function<O, T> transformer, public static <O, T> List<T> transformAndCollectAsync(List<O> originalList, Function<O, T> transformer,
BiFunction<O, Throwable, ? extends T> exceptionHandler) { BiFunction<O, Throwable, ? extends T> exceptionHandler) {
return originalList.stream() return transformAndCollectAsync(originalList, transformer, exceptionHandler, Collectors.toList());
}
/**
* Transforms original list into the final list using the function transformer in an asynchronous fashion
* with exceptions handled by the input exceptionHandler
* i.e. each element transform is run as a separate CompleteableFuture and then joined at the end
*/
public static <O, T, OUTPUT> OUTPUT transformAndCollectAsync(Collection<O> originalCollection,
Function<O, T> transformer, BiFunction<O, Throwable, ? extends T> exceptionHandler, Collector<T, ?, OUTPUT> collector) {
return originalCollection.stream()
.map(element -> CompletableFuture.supplyAsync(() -> transformer.apply(element)) .map(element -> CompletableFuture.supplyAsync(() -> transformer.apply(element))
.exceptionally(e -> exceptionHandler.apply(element, e))) .exceptionally(e -> exceptionHandler.apply(element, e)))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collect(Collectors.collectingAndThen(Collectors.toList(), .collect(Collectors.collectingAndThen(Collectors.toList(),
completableFutureList -> completableFutureList.stream().map(CompletableFuture::join))) completableFutureList -> completableFutureList.stream().map(CompletableFuture::join)))
.collect(Collectors.toList()); .collect(collector);
} }
/** /**