fix(elasticsearch): refactor idHashAlgo setting (#11193)

This commit is contained in:
david-leifker 2024-08-16 14:41:44 -05:00 committed by GitHub
parent cb33c0fef7
commit edb9a87b84
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
40 changed files with 90 additions and 109 deletions

View File

@ -23,8 +23,6 @@ PE_CONSUMER_ENABLED=true
UI_INGESTION_ENABLED=true
ENTITY_SERVICE_ENABLE_RETENTION=true
ELASTIC_ID_HASH_ALGO=MD5
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false

View File

@ -27,8 +27,6 @@ MCE_CONSUMER_ENABLED=true
PE_CONSUMER_ENABLED=true
UI_INGESTION_ENABLED=true
ELASTIC_ID_HASH_ALGO=MD5
# Uncomment to enable Metadata Service Authentication
METADATA_SERVICE_AUTH_ENABLED=false

View File

@ -13,8 +13,6 @@ ES_BULK_REFRESH_POLICY=WAIT_UNTIL
GRAPH_SERVICE_IMPL=elasticsearch
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
ELASTIC_ID_HASH_ALGO=MD5
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false

View File

@ -17,8 +17,6 @@ NEO4J_PASSWORD=datahub
GRAPH_SERVICE_IMPL=neo4j
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
ELASTIC_ID_HASH_ALGO=MD5
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false

View File

@ -24,8 +24,6 @@ MAE_CONSUMER_ENABLED=false
PE_CONSUMER_ENABLED=false
UI_INGESTION_ENABLED=false
ELASTIC_ID_HASH_ALGO=MD5
# Uncomment to configure kafka topic names
# Make sure these names are consistent across the whole deployment
# METADATA_CHANGE_PROPOSAL_TOPIC_NAME=MetadataChangeProposal_v1

View File

@ -24,8 +24,6 @@ MAE_CONSUMER_ENABLED=false
PE_CONSUMER_ENABLED=false
UI_INGESTION_ENABLED=false
ELASTIC_ID_HASH_ALGO=MD5
# Uncomment to configure kafka topic names
# Make sure these names are consistent across the whole deployment
# METADATA_CHANGE_PROPOSAL_TOPIC_NAME=MetadataChangeProposal_v1

View File

@ -86,7 +86,6 @@ services:
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_PORT=9200
- ELASTIC_ID_HASH_ALGO=MD5
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL

View File

@ -86,7 +86,6 @@ services:
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_PORT=9200
- ELASTIC_ID_HASH_ALGO=MD5
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL

View File

@ -86,7 +86,6 @@ services:
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_PORT=9200
- ELASTIC_ID_HASH_ALGO=MD5
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL

View File

@ -19,7 +19,6 @@ services:
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- GRAPH_SERVICE_IMPL=elasticsearch
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
- ELASTIC_ID_HASH_ALGO=MD5
hostname: datahub-mae-consumer
image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head}
ports:
@ -38,7 +37,6 @@ services:
- EBEAN_DATASOURCE_USERNAME=datahub
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTIC_ID_HASH_ALGO=MD5
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL

View File

@ -26,7 +26,6 @@ services:
- NEO4J_PASSWORD=datahub
- GRAPH_SERVICE_IMPL=neo4j
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
- ELASTIC_ID_HASH_ALGO=MD5
hostname: datahub-mae-consumer
image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head}
ports:
@ -48,7 +47,6 @@ services:
- EBEAN_DATASOURCE_USERNAME=datahub
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- ELASTIC_ID_HASH_ALGO=MD5
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL

View File

@ -86,7 +86,6 @@ services:
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_PORT=9200
- ELASTIC_ID_HASH_ALGO=MD5
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL

View File

@ -13,6 +13,7 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
@ -59,7 +60,7 @@ public class Edge {
null);
}
public String toDocId() {
public String toDocId(@Nonnull String idHashAlgo) {
StringBuilder rawDocId = new StringBuilder();
rawDocId
.append(getSource().toString())
@ -72,9 +73,8 @@ public class Edge {
}
try {
String hashAlgo = System.getenv("ELASTIC_ID_HASH_ALGO");
byte[] bytesOfRawDocID = rawDocId.toString().getBytes(StandardCharsets.UTF_8);
MessageDigest md = MessageDigest.getInstance(hashAlgo);
MessageDigest md = MessageDigest.getInstance(idHashAlgo);
byte[] thedigest = md.digest(bytesOfRawDocID);
return Base64.getEncoder().encodeToString(thedigest);
} catch (NoSuchAlgorithmException e) {

View File

@ -130,7 +130,6 @@ test {
// override, testng controlling parallelization
// increasing >1 will merely run all tests extra times
maxParallelForks = 1
environment "ELASTIC_ID_HASH_ALGO", "MD5"
}
useTestNG() {
suites 'src/test/resources/testng.xml'

View File

@ -64,6 +64,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd
private final ESGraphWriteDAO _graphWriteDAO;
private final ESGraphQueryDAO _graphReadDAO;
private final ESIndexBuilder _indexBuilder;
private final String idHashAlgo;
public static final String INDEX_NAME = "graph_service_v1";
private static final Map<String, Object> EMPTY_HASH = new HashMap<>();
@ -125,7 +126,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd
@Override
public void addEdge(@Nonnull final Edge edge) {
String docId = edge.toDocId();
String docId = edge.toDocId(idHashAlgo);
String edgeDocument = toDocument(edge);
_graphWriteDAO.upsertDocument(docId, edgeDocument);
}
@ -137,7 +138,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd
@Override
public void removeEdge(@Nonnull final Edge edge) {
String docId = edge.toDocId();
String docId = edge.toDocId(idHashAlgo);
_graphWriteDAO.deleteDocument(docId);
}

View File

@ -80,6 +80,7 @@ public class UpdateIndicesService implements SearchIndicesService {
private final SystemMetadataService _systemMetadataService;
private final SearchDocumentTransformer _searchDocumentTransformer;
private final EntityIndexBuilders _entityIndexBuilders;
@Nonnull private final String idHashAlgo;
@Value("${featureFlags.graphServiceDiffModeEnabled:true}")
private boolean _graphDiffMode;
@ -117,13 +118,15 @@ public class UpdateIndicesService implements SearchIndicesService {
TimeseriesAspectService timeseriesAspectService,
SystemMetadataService systemMetadataService,
SearchDocumentTransformer searchDocumentTransformer,
EntityIndexBuilders entityIndexBuilders) {
EntityIndexBuilders entityIndexBuilders,
@Nonnull String idHashAlgo) {
_graphService = graphService;
_entitySearchService = entitySearchService;
_timeseriesAspectService = timeseriesAspectService;
_systemMetadataService = systemMetadataService;
_searchDocumentTransformer = searchDocumentTransformer;
_entityIndexBuilders = entityIndexBuilders;
this.idHashAlgo = idHashAlgo;
}
@Override
@ -601,7 +604,9 @@ public class UpdateIndicesService implements SearchIndicesService {
SystemMetadata systemMetadata) {
Map<String, JsonNode> documents;
try {
documents = TimeseriesAspectTransformer.transform(urn, aspect, aspectSpec, systemMetadata);
documents =
TimeseriesAspectTransformer.transform(
urn, aspect, aspectSpec, systemMetadata, idHashAlgo);
} catch (JsonProcessingException e) {
log.error("Failed to generate timeseries document from aspect: {}", e.toString());
return;

View File

@ -54,7 +54,8 @@ public class TimeseriesAspectTransformer {
@Nonnull final Urn urn,
@Nonnull final RecordTemplate timeseriesAspect,
@Nonnull final AspectSpec aspectSpec,
@Nullable final SystemMetadata systemMetadata)
@Nullable final SystemMetadata systemMetadata,
@Nonnull final String idHashAlgo)
throws JsonProcessingException {
ObjectNode commonDocument = getCommonDocument(urn, timeseriesAspect, systemMetadata);
Map<String, JsonNode> finalDocuments = new HashMap<>();
@ -74,7 +75,7 @@ public class TimeseriesAspectTransformer {
final Map<TimeseriesFieldSpec, List<Object>> timeseriesFieldValueMap =
FieldExtractor.extractFields(timeseriesAspect, aspectSpec.getTimeseriesFieldSpecs());
timeseriesFieldValueMap.forEach((k, v) -> setTimeseriesField(document, k, v));
finalDocuments.put(getDocId(document, null), document);
finalDocuments.put(getDocId(document, null, idHashAlgo), document);
// Create new rows for the member collection fields.
final Map<TimeseriesFieldCollectionSpec, List<Object>> timeseriesFieldCollectionValueMap =
@ -83,7 +84,7 @@ public class TimeseriesAspectTransformer {
timeseriesFieldCollectionValueMap.forEach(
(key, values) ->
finalDocuments.putAll(
getTimeseriesFieldCollectionDocuments(key, values, commonDocument)));
getTimeseriesFieldCollectionDocuments(key, values, commonDocument, idHashAlgo)));
return finalDocuments;
}
@ -216,12 +217,13 @@ public class TimeseriesAspectTransformer {
private static Map<String, JsonNode> getTimeseriesFieldCollectionDocuments(
final TimeseriesFieldCollectionSpec fieldSpec,
final List<Object> values,
final ObjectNode commonDocument) {
final ObjectNode commonDocument,
@Nonnull final String idHashAlgo) {
return values.stream()
.map(value -> getTimeseriesFieldCollectionDocument(fieldSpec, value, commonDocument))
.collect(
Collectors.toMap(
keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst()),
keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst(), idHashAlgo),
Pair::getSecond));
}
@ -257,9 +259,9 @@ public class TimeseriesAspectTransformer {
finalDocument);
}
private static String getDocId(@Nonnull JsonNode document, String collectionId)
private static String getDocId(
@Nonnull JsonNode document, String collectionId, @Nonnull String idHashAlgo)
throws IllegalArgumentException {
String hashAlgo = System.getenv("ELASTIC_ID_HASH_ALGO");
String docId = document.get(MappingsBuilder.TIMESTAMP_MILLIS_FIELD).toString();
JsonNode eventGranularity = document.get(MappingsBuilder.EVENT_GRANULARITY);
if (eventGranularity != null) {
@ -278,9 +280,9 @@ public class TimeseriesAspectTransformer {
docId += partitionSpec.toString();
}
if (hashAlgo.equalsIgnoreCase("SHA-256")) {
if (idHashAlgo.equalsIgnoreCase("SHA-256")) {
return DigestUtils.sha256Hex(docId);
} else if (hashAlgo.equalsIgnoreCase("MD5")) {
} else if (idHashAlgo.equalsIgnoreCase("MD5")) {
return DigestUtils.md5Hex(docId);
}
throw new IllegalArgumentException("Hash function not handled !");

View File

@ -62,7 +62,7 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase {
@Nonnull
protected abstract ESIndexBuilder getIndexBuilder();
private final IndexConvention _indexConvention = IndexConventionImpl.NO_PREFIX;
private final IndexConvention _indexConvention = IndexConventionImpl.noPrefix("MD5");
private final String _indexName = _indexConvention.getIndexName(INDEX_NAME);
private ElasticSearchGraphService _client;
@ -108,7 +108,8 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase {
_indexConvention,
writeDAO,
readDAO,
getIndexBuilder());
getIndexBuilder(),
"MD5");
}
@Override

View File

@ -122,7 +122,7 @@ public abstract class LineageServiceTestBase extends AbstractTestNGSpringContext
operationContext =
TestOperationContexts.systemContextNoSearchAuthorization(
new SnapshotEntityRegistry(new Snapshot()),
new IndexConventionImpl("lineage_search_service_test"))
new IndexConventionImpl("lineage_search_service_test", "MD5"))
.asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH);
settingsBuilder = new SettingsBuilder(null);
elasticSearchService = buildEntitySearchService();

View File

@ -79,7 +79,7 @@ public abstract class SearchServiceTestBase extends AbstractTestNGSpringContextT
operationContext =
TestOperationContexts.systemContextNoSearchAuthorization(
new SnapshotEntityRegistry(new Snapshot()),
new IndexConventionImpl("search_service_test"))
new IndexConventionImpl("search_service_test", "MD5"))
.asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH);
settingsBuilder = new SettingsBuilder(null);

View File

@ -62,7 +62,8 @@ public abstract class TestEntityTestBase extends AbstractTestNGSpringContextTest
public void setup() {
opContext =
TestOperationContexts.systemContextNoSearchAuthorization(
new SnapshotEntityRegistry(new Snapshot()), new IndexConventionImpl("es_service_test"));
new SnapshotEntityRegistry(new Snapshot()),
new IndexConventionImpl("es_service_test", "MD5"));
settingsBuilder = new SettingsBuilder(null);
elasticSearchService = buildService();
elasticSearchService.reindexAll(Collections.emptySet());

View File

@ -45,7 +45,7 @@ public class BrowseDAOTest extends AbstractTestNGSpringContextTests {
mockClient = mock(RestHighLevelClient.class);
opContext =
TestOperationContexts.systemContextNoSearchAuthorization(
new IndexConventionImpl("es_browse_dao_test"));
new IndexConventionImpl("es_browse_dao_test", "MD5"));
browseDAO = new ESBrowseDAO(mockClient, searchConfiguration, customSearchConfiguration);
}

View File

@ -32,7 +32,7 @@ public abstract class SystemMetadataServiceTestBase extends AbstractTestNGSpring
protected abstract ESIndexBuilder getIndexBuilder();
private final IndexConvention _indexConvention =
new IndexConventionImpl("es_system_metadata_service_test");
new IndexConventionImpl("es_system_metadata_service_test", "MD5");
private ElasticSearchSystemMetadataService _client;

View File

@ -126,7 +126,7 @@ public abstract class TimeseriesAspectServiceTestBase extends AbstractTestNGSpri
opContext =
TestOperationContexts.systemContextNoSearchAuthorization(
entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test"));
entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test", "MD5"));
elasticSearchTimeseriesAspectService = buildService();
elasticSearchTimeseriesAspectService.reindexAll(Collections.emptySet());
@ -152,7 +152,7 @@ public abstract class TimeseriesAspectServiceTestBase extends AbstractTestNGSpri
private void upsertDocument(TestEntityProfile dp, Urn urn) throws JsonProcessingException {
Map<String, JsonNode> documents =
TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null);
TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null, "MD5");
assertEquals(documents.size(), 3);
documents.forEach(
(key, value) ->

View File

@ -86,12 +86,12 @@ public class SampleDataFixtureConfiguration {
@Bean(name = "sampleDataIndexConvention")
protected IndexConvention indexConvention(@Qualifier("sampleDataPrefix") String prefix) {
return new IndexConventionImpl(prefix);
return new IndexConventionImpl(prefix, "MD5");
}
@Bean(name = "longTailIndexConvention")
protected IndexConvention longTailIndexConvention(@Qualifier("longTailPrefix") String prefix) {
return new IndexConventionImpl(prefix);
return new IndexConventionImpl(prefix, "MD5");
}
@Bean(name = "sampleDataFixtureName")

View File

@ -71,7 +71,7 @@ public class SearchLineageFixtureConfiguration {
@Bean(name = "searchLineageIndexConvention")
protected IndexConvention indexConvention(@Qualifier("searchLineagePrefix") String prefix) {
return new IndexConventionImpl(prefix);
return new IndexConventionImpl(prefix, "MD5");
}
@Bean(name = "searchLineageFixtureName")
@ -173,7 +173,8 @@ public class SearchLineageFixtureConfiguration {
new ESGraphWriteDAO(indexConvention, bulkProcessor, 1),
new ESGraphQueryDAO(
searchClient, lineageRegistry, indexConvention, getGraphQueryConfiguration()),
indexBuilder);
indexBuilder,
indexConvention.getIdHashAlgo());
graphService.reindexAll(Collections.emptySet());
return graphService;
}

View File

@ -125,7 +125,8 @@ public class UpdateIndicesHookTest {
mockTimeseriesAspectService,
mockSystemMetadataService,
searchDocumentTransformer,
mockEntityIndexBuilders);
mockEntityIndexBuilders,
"MD5");
OperationContext systemOperationContext =
TestOperationContexts.systemContextNoSearchAuthorization();
@ -235,7 +236,8 @@ public class UpdateIndicesHookTest {
mockTimeseriesAspectService,
mockSystemMetadataService,
searchDocumentTransformer,
mockEntityIndexBuilders);
mockEntityIndexBuilders,
"MD5");
updateIndicesHook = new UpdateIndicesHook(updateIndicesService, true, false);
updateIndicesHook.init(

View File

@ -21,7 +21,7 @@ import lombok.Getter;
public class SearchContext implements ContextInterface {
public static SearchContext EMPTY =
SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build();
SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("")).build();
public static SearchContext withFlagDefaults(
@Nonnull SearchContext searchContext,

View File

@ -191,7 +191,7 @@ public class TestOperationContexts {
IndexConvention indexConvention =
Optional.ofNullable(indexConventionSupplier)
.map(Supplier::get)
.orElse(IndexConventionImpl.NO_PREFIX);
.orElse(IndexConventionImpl.noPrefix("MD5"));
ServicesRegistryContext servicesRegistryContext =
Optional.ofNullable(servicesRegistrySupplier).orElse(() -> null).get();

View File

@ -12,26 +12,26 @@ public class SearchContextTest {
@Test
public void searchContextId() {
SearchContext testNoFlags =
SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build();
SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build();
assertEquals(
testNoFlags.getCacheKeyComponent(),
SearchContext.builder()
.indexConvention(IndexConventionImpl.NO_PREFIX)
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
.build()
.getCacheKeyComponent(),
"Expected consistent context ids across instances");
SearchContext testWithFlags =
SearchContext.builder()
.indexConvention(IndexConventionImpl.NO_PREFIX)
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
.searchFlags(new SearchFlags().setFulltext(true))
.build();
assertEquals(
testWithFlags.getCacheKeyComponent(),
SearchContext.builder()
.indexConvention(IndexConventionImpl.NO_PREFIX)
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
.searchFlags(new SearchFlags().setFulltext(true))
.build()
.getCacheKeyComponent(),
@ -44,7 +44,7 @@ public class SearchContextTest {
assertNotEquals(
testWithFlags.getCacheKeyComponent(),
SearchContext.builder()
.indexConvention(IndexConventionImpl.NO_PREFIX)
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
.searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true))
.build()
.getCacheKeyComponent(),
@ -53,7 +53,7 @@ public class SearchContextTest {
assertNotEquals(
testNoFlags.getCacheKeyComponent(),
SearchContext.builder()
.indexConvention(new IndexConventionImpl("Some Prefix"))
.indexConvention(new IndexConventionImpl("Some Prefix", "MD5"))
.searchFlags(null)
.build()
.getCacheKeyComponent(),
@ -61,7 +61,7 @@ public class SearchContextTest {
assertNotEquals(
SearchContext.builder()
.indexConvention(IndexConventionImpl.NO_PREFIX)
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
.searchFlags(
new SearchFlags()
.setFulltext(false)
@ -70,7 +70,7 @@ public class SearchContextTest {
.build()
.getCacheKeyComponent(),
SearchContext.builder()
.indexConvention(IndexConventionImpl.NO_PREFIX)
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
.searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true))
.build()
.getCacheKeyComponent(),
@ -80,7 +80,7 @@ public class SearchContextTest {
@Test
public void testImmutableSearchFlags() {
SearchContext initial =
SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build();
SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build();
assertEquals(initial.getSearchFlags(), new SearchFlags().setSkipCache(false));
SearchContext mutated = initial.withFlagDefaults(flags -> flags.setSkipCache(true));

View File

@ -8,4 +8,5 @@ public class ElasticSearchConfiguration {
private BuildIndicesConfiguration buildIndices;
public String implementation;
private SearchConfiguration search;
private String idHashAlgo;
}

View File

@ -11,6 +11,7 @@ import com.linkedin.metadata.models.registry.LineageRegistry;
import javax.annotation.Nonnull;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
@ -30,7 +31,8 @@ public class ElasticSearchGraphServiceFactory {
@Bean(name = "elasticSearchGraphService")
@Nonnull
protected ElasticSearchGraphService getInstance() {
protected ElasticSearchGraphService getInstance(
@Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) {
LineageRegistry lineageRegistry = new LineageRegistry(entityRegistry);
return new ElasticSearchGraphService(
lineageRegistry,
@ -45,6 +47,7 @@ public class ElasticSearchGraphServiceFactory {
lineageRegistry,
components.getIndexConvention(),
configurationProvider.getElasticSearch().getSearch().getGraph()),
components.getIndexBuilder());
components.getIndexBuilder(),
idHashAlgo);
}
}

View File

@ -19,7 +19,8 @@ public class IndexConventionFactory {
private String indexPrefix;
@Bean(name = INDEX_CONVENTION_BEAN)
protected IndexConvention createInstance() {
return new IndexConventionImpl(indexPrefix);
protected IndexConvention createInstance(
@Value("${elasticsearch.idHashAlgo}") final String isHashAlgo) {
return new IndexConventionImpl(indexPrefix, isHashAlgo);
}
}

View File

@ -9,6 +9,7 @@ import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
import com.linkedin.metadata.service.UpdateIndicesService;
import com.linkedin.metadata.systemmetadata.SystemMetadataService;
import com.linkedin.metadata.timeseries.TimeseriesAspectService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@ -30,7 +31,8 @@ public class UpdateIndicesServiceFactory {
TimeseriesAspectService timeseriesAspectService,
SystemMetadataService systemMetadataService,
SearchDocumentTransformer searchDocumentTransformer,
EntityIndexBuilders entityIndexBuilders) {
EntityIndexBuilders entityIndexBuilders,
@Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) {
return new UpdateIndicesService(
graphService,
@ -38,7 +40,8 @@ public class UpdateIndicesServiceFactory {
timeseriesAspectService,
systemMetadataService,
searchDocumentTransformer,
entityIndexBuilders);
entityIndexBuilders,
idHashAlgo);
}
@Bean
@ -50,7 +53,8 @@ public class UpdateIndicesServiceFactory {
final SystemMetadataService systemMetadataService,
final SearchDocumentTransformer searchDocumentTransformer,
final EntityIndexBuilders entityIndexBuilders,
final EntityService<?> entityService) {
final EntityService<?> entityService,
@Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) {
UpdateIndicesService updateIndicesService =
new UpdateIndicesService(
@ -59,7 +63,8 @@ public class UpdateIndicesServiceFactory {
timeseriesAspectService,
systemMetadataService,
searchDocumentTransformer,
entityIndexBuilders);
entityIndexBuilders,
idHashAlgo);
entityService.setUpdateIndicesService(updateIndicesService);

View File

@ -2,25 +2,20 @@ package com.linkedin.metadata.resources.usage;
import static com.datahub.authorization.AuthUtil.isAPIAuthorized;
import static com.datahub.authorization.AuthUtil.isAPIAuthorizedEntityUrns;
import static com.linkedin.metadata.Constants.*;
import static com.linkedin.metadata.authorization.ApiOperation.UPDATE;
import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ASPECT_NAME;
import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ENTITY_NAME;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.datahub.authentication.Authentication;
import com.datahub.authentication.AuthenticationContext;
import com.datahub.authorization.EntitySpec;
import com.datahub.plugins.auth.authorization.Authorizer;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.StreamReadConstraints;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.linkedin.common.WindowDuration;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.data.template.StringArray;
import com.linkedin.dataset.DatasetFieldUsageCounts;
import com.linkedin.dataset.DatasetFieldUsageCountsArray;
import com.linkedin.dataset.DatasetUsageStatistics;
@ -29,17 +24,10 @@ import com.linkedin.dataset.DatasetUserUsageCountsArray;
import com.linkedin.metadata.authorization.PoliciesConfig;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.query.filter.Condition;
import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
import com.linkedin.metadata.query.filter.Criterion;
import com.linkedin.metadata.query.filter.CriterionArray;
import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.restli.RestliUtil;
import com.linkedin.metadata.timeseries.TimeseriesAspectService;
import com.linkedin.metadata.timeseries.elastic.UsageServiceUtil;
import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer;
import com.linkedin.metadata.utils.metrics.MetricUtils;
import com.linkedin.parseq.Task;
import com.linkedin.restli.common.HttpStatus;
import com.linkedin.restli.server.RestLiServiceException;
@ -47,35 +35,20 @@ import com.linkedin.restli.server.annotations.Action;
import com.linkedin.restli.server.annotations.ActionParam;
import com.linkedin.restli.server.annotations.RestLiSimpleResource;
import com.linkedin.restli.server.resources.SimpleResourceTemplate;
import com.linkedin.timeseries.AggregationSpec;
import com.linkedin.timeseries.AggregationType;
import com.linkedin.timeseries.CalendarInterval;
import com.linkedin.timeseries.GenericTable;
import com.linkedin.timeseries.GroupingBucket;
import com.linkedin.timeseries.GroupingBucketType;
import com.linkedin.timeseries.TimeWindowSize;
import com.linkedin.usage.FieldUsageCounts;
import com.linkedin.usage.FieldUsageCountsArray;
import com.linkedin.usage.UsageAggregation;
import com.linkedin.usage.UsageAggregationArray;
import com.linkedin.usage.UsageAggregationMetrics;
import com.linkedin.usage.UsageQueryResult;
import com.linkedin.usage.UsageQueryResultAggregations;
import com.linkedin.usage.UsageTimeRange;
import com.linkedin.usage.UserUsageCounts;
import com.linkedin.usage.UserUsageCountsArray;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.RequestContext;
import io.opentelemetry.extension.annotations.WithSpan;
import java.net.URISyntaxException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nonnull;
import javax.inject.Inject;
import javax.inject.Named;
@ -255,7 +228,8 @@ public class UsageStats extends SimpleResourceTemplate<UsageAggregation> {
try {
documents =
TimeseriesAspectTransformer.transform(
bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null);
bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null,
systemOperationContext.getSearchContext().getIndexConvention().getIdHashAlgo());
} catch (JsonProcessingException e) {
log.error("Failed to generate timeseries document from aspect: {}", e.toString());
return;

View File

@ -47,4 +47,7 @@ public interface IndexConvention {
* if one cannot be extracted
*/
Optional<Pair<String, String>> getEntityAndAspectName(String timeseriesAspectIndexName);
@Nonnull
String getIdHashAlgo();
}

View File

@ -8,25 +8,30 @@ import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.Getter;
import org.apache.commons.lang3.StringUtils;
// Default implementation of search index naming convention
public class IndexConventionImpl implements IndexConvention {
public static final IndexConvention NO_PREFIX = new IndexConventionImpl(null);
public static IndexConvention noPrefix(@Nonnull String idHashAlgo) {
return new IndexConventionImpl(null, idHashAlgo);
}
// Map from Entity name -> Index name
private final Map<String, String> indexNameMapping = new ConcurrentHashMap<>();
private final Optional<String> _prefix;
private final String _getAllEntityIndicesPattern;
private final String _getAllTimeseriesIndicesPattern;
@Getter private final String idHashAlgo;
private static final String ENTITY_INDEX_VERSION = "v2";
private static final String ENTITY_INDEX_SUFFIX = "index";
private static final String TIMESERIES_INDEX_VERSION = "v1";
private static final String TIMESERIES_ENTITY_INDEX_SUFFIX = "aspect";
public IndexConventionImpl(@Nullable String prefix) {
public IndexConventionImpl(@Nullable String prefix, String idHashAlgo) {
_prefix = StringUtils.isEmpty(prefix) ? Optional.empty() : Optional.of(prefix);
this.idHashAlgo = idHashAlgo;
_getAllEntityIndicesPattern =
_prefix.map(p -> p + "_").orElse("")
+ "*"

View File

@ -10,7 +10,7 @@ public class IndexConventionImplTest {
@Test
public void testIndexConventionNoPrefix() {
IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX;
IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5");
String entityName = "dataset";
String expectedIndexName = "datasetindex_v2";
assertEquals(indexConventionNoPrefix.getEntityIndexName(entityName), expectedIndexName);
@ -25,7 +25,7 @@ public class IndexConventionImplTest {
@Test
public void testIndexConventionPrefix() {
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix");
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5");
String entityName = "dataset";
String expectedIndexName = "prefix_datasetindex_v2";
assertEquals(indexConventionPrefix.getEntityIndexName(entityName), expectedIndexName);
@ -42,7 +42,7 @@ public class IndexConventionImplTest {
@Test
public void testTimeseriesIndexConventionNoPrefix() {
IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX;
IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5");
String entityName = "dataset";
String aspectName = "datasetusagestatistics";
String expectedIndexName = "dataset_datasetusagestatisticsaspect_v1";
@ -64,7 +64,7 @@ public class IndexConventionImplTest {
@Test
public void testTimeseriesIndexConventionPrefix() {
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix");
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5");
String entityName = "dataset";
String aspectName = "datasetusagestatistics";
String expectedIndexName = "prefix_dataset_datasetusagestatisticsaspect_v1";

View File

@ -16,8 +16,6 @@ DATAHUB_SEARCH_TAG="${DATAHUB_SEARCH_TAG:=2.9.0}"
XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true}"
ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}"
USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}"
ELASTIC_ID_HASH_ALGO="${ELASTIC_ID_HASH_ALGO:=MD5}"
DATAHUB_TELEMETRY_ENABLED=false \
DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \

View File

@ -1,3 +1,2 @@
export DATAHUB_KAFKA_SCHEMA_REGISTRY_URL=http://localhost:8080/schema-registry/api
export DATAHUB_GMS_URL=http://localhost:8080
export ELASTIC_ID_HASH_ALGO="MD5"
export DATAHUB_GMS_URL=http://localhost:8080