mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
fix(elasticsearch): refactor idHashAlgo setting (#11193)
This commit is contained in:
parent
cb33c0fef7
commit
edb9a87b84
@ -23,8 +23,6 @@ PE_CONSUMER_ENABLED=true
|
||||
UI_INGESTION_ENABLED=true
|
||||
ENTITY_SERVICE_ENABLE_RETENTION=true
|
||||
|
||||
ELASTIC_ID_HASH_ALGO=MD5
|
||||
|
||||
# Uncomment to disable persistence of client-side analytics events
|
||||
# DATAHUB_ANALYTICS_ENABLED=false
|
||||
|
||||
|
2
docker/datahub-gms/env/docker.env
vendored
2
docker/datahub-gms/env/docker.env
vendored
@ -27,8 +27,6 @@ MCE_CONSUMER_ENABLED=true
|
||||
PE_CONSUMER_ENABLED=true
|
||||
UI_INGESTION_ENABLED=true
|
||||
|
||||
ELASTIC_ID_HASH_ALGO=MD5
|
||||
|
||||
# Uncomment to enable Metadata Service Authentication
|
||||
METADATA_SERVICE_AUTH_ENABLED=false
|
||||
|
||||
|
@ -13,8 +13,6 @@ ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
GRAPH_SERVICE_IMPL=elasticsearch
|
||||
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
|
||||
|
||||
ELASTIC_ID_HASH_ALGO=MD5
|
||||
|
||||
# Uncomment to disable persistence of client-side analytics events
|
||||
# DATAHUB_ANALYTICS_ENABLED=false
|
||||
|
||||
|
2
docker/datahub-mae-consumer/env/docker.env
vendored
2
docker/datahub-mae-consumer/env/docker.env
vendored
@ -17,8 +17,6 @@ NEO4J_PASSWORD=datahub
|
||||
GRAPH_SERVICE_IMPL=neo4j
|
||||
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
|
||||
|
||||
ELASTIC_ID_HASH_ALGO=MD5
|
||||
|
||||
# Uncomment to disable persistence of client-side analytics events
|
||||
# DATAHUB_ANALYTICS_ENABLED=false
|
||||
|
||||
|
@ -24,8 +24,6 @@ MAE_CONSUMER_ENABLED=false
|
||||
PE_CONSUMER_ENABLED=false
|
||||
UI_INGESTION_ENABLED=false
|
||||
|
||||
ELASTIC_ID_HASH_ALGO=MD5
|
||||
|
||||
# Uncomment to configure kafka topic names
|
||||
# Make sure these names are consistent across the whole deployment
|
||||
# METADATA_CHANGE_PROPOSAL_TOPIC_NAME=MetadataChangeProposal_v1
|
||||
|
2
docker/datahub-mce-consumer/env/docker.env
vendored
2
docker/datahub-mce-consumer/env/docker.env
vendored
@ -24,8 +24,6 @@ MAE_CONSUMER_ENABLED=false
|
||||
PE_CONSUMER_ENABLED=false
|
||||
UI_INGESTION_ENABLED=false
|
||||
|
||||
ELASTIC_ID_HASH_ALGO=MD5
|
||||
|
||||
# Uncomment to configure kafka topic names
|
||||
# Make sure these names are consistent across the whole deployment
|
||||
# METADATA_CHANGE_PROPOSAL_TOPIC_NAME=MetadataChangeProposal_v1
|
||||
|
@ -86,7 +86,6 @@ services:
|
||||
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
|
||||
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
|
||||
- ENTITY_SERVICE_ENABLE_RETENTION=true
|
||||
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
|
@ -86,7 +86,6 @@ services:
|
||||
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
|
||||
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
|
||||
- ENTITY_SERVICE_ENABLE_RETENTION=true
|
||||
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
|
@ -86,7 +86,6 @@ services:
|
||||
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
|
||||
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
|
||||
- ENTITY_SERVICE_ENABLE_RETENTION=true
|
||||
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
|
@ -19,7 +19,6 @@ services:
|
||||
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
- GRAPH_SERVICE_IMPL=elasticsearch
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
hostname: datahub-mae-consumer
|
||||
image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head}
|
||||
ports:
|
||||
@ -38,7 +37,6 @@ services:
|
||||
- EBEAN_DATASOURCE_USERNAME=datahub
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml
|
||||
- ENTITY_SERVICE_ENABLE_RETENTION=true
|
||||
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
|
@ -26,7 +26,6 @@ services:
|
||||
- NEO4J_PASSWORD=datahub
|
||||
- GRAPH_SERVICE_IMPL=neo4j
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
hostname: datahub-mae-consumer
|
||||
image: ${DATAHUB_MAE_CONSUMER_IMAGE:-acryldata/datahub-mae-consumer}:${DATAHUB_VERSION:-head}
|
||||
ports:
|
||||
@ -48,7 +47,6 @@ services:
|
||||
- EBEAN_DATASOURCE_USERNAME=datahub
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mce-consumer/resources/entity-registry.yml
|
||||
- ENTITY_SERVICE_ENABLE_RETENTION=true
|
||||
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
|
@ -86,7 +86,6 @@ services:
|
||||
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
|
||||
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- ELASTIC_ID_HASH_ALGO=MD5
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
|
||||
- ENTITY_SERVICE_ENABLE_RETENTION=true
|
||||
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
|
||||
|
@ -13,6 +13,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import javax.annotation.Nonnull;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
@ -59,7 +60,7 @@ public class Edge {
|
||||
null);
|
||||
}
|
||||
|
||||
public String toDocId() {
|
||||
public String toDocId(@Nonnull String idHashAlgo) {
|
||||
StringBuilder rawDocId = new StringBuilder();
|
||||
rawDocId
|
||||
.append(getSource().toString())
|
||||
@ -72,9 +73,8 @@ public class Edge {
|
||||
}
|
||||
|
||||
try {
|
||||
String hashAlgo = System.getenv("ELASTIC_ID_HASH_ALGO");
|
||||
byte[] bytesOfRawDocID = rawDocId.toString().getBytes(StandardCharsets.UTF_8);
|
||||
MessageDigest md = MessageDigest.getInstance(hashAlgo);
|
||||
MessageDigest md = MessageDigest.getInstance(idHashAlgo);
|
||||
byte[] thedigest = md.digest(bytesOfRawDocID);
|
||||
return Base64.getEncoder().encodeToString(thedigest);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
|
@ -130,7 +130,6 @@ test {
|
||||
// override, testng controlling parallelization
|
||||
// increasing >1 will merely run all tests extra times
|
||||
maxParallelForks = 1
|
||||
environment "ELASTIC_ID_HASH_ALGO", "MD5"
|
||||
}
|
||||
useTestNG() {
|
||||
suites 'src/test/resources/testng.xml'
|
||||
|
@ -64,6 +64,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd
|
||||
private final ESGraphWriteDAO _graphWriteDAO;
|
||||
private final ESGraphQueryDAO _graphReadDAO;
|
||||
private final ESIndexBuilder _indexBuilder;
|
||||
private final String idHashAlgo;
|
||||
public static final String INDEX_NAME = "graph_service_v1";
|
||||
private static final Map<String, Object> EMPTY_HASH = new HashMap<>();
|
||||
|
||||
@ -125,7 +126,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd
|
||||
|
||||
@Override
|
||||
public void addEdge(@Nonnull final Edge edge) {
|
||||
String docId = edge.toDocId();
|
||||
String docId = edge.toDocId(idHashAlgo);
|
||||
String edgeDocument = toDocument(edge);
|
||||
_graphWriteDAO.upsertDocument(docId, edgeDocument);
|
||||
}
|
||||
@ -137,7 +138,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd
|
||||
|
||||
@Override
|
||||
public void removeEdge(@Nonnull final Edge edge) {
|
||||
String docId = edge.toDocId();
|
||||
String docId = edge.toDocId(idHashAlgo);
|
||||
_graphWriteDAO.deleteDocument(docId);
|
||||
}
|
||||
|
||||
|
@ -80,6 +80,7 @@ public class UpdateIndicesService implements SearchIndicesService {
|
||||
private final SystemMetadataService _systemMetadataService;
|
||||
private final SearchDocumentTransformer _searchDocumentTransformer;
|
||||
private final EntityIndexBuilders _entityIndexBuilders;
|
||||
@Nonnull private final String idHashAlgo;
|
||||
|
||||
@Value("${featureFlags.graphServiceDiffModeEnabled:true}")
|
||||
private boolean _graphDiffMode;
|
||||
@ -117,13 +118,15 @@ public class UpdateIndicesService implements SearchIndicesService {
|
||||
TimeseriesAspectService timeseriesAspectService,
|
||||
SystemMetadataService systemMetadataService,
|
||||
SearchDocumentTransformer searchDocumentTransformer,
|
||||
EntityIndexBuilders entityIndexBuilders) {
|
||||
EntityIndexBuilders entityIndexBuilders,
|
||||
@Nonnull String idHashAlgo) {
|
||||
_graphService = graphService;
|
||||
_entitySearchService = entitySearchService;
|
||||
_timeseriesAspectService = timeseriesAspectService;
|
||||
_systemMetadataService = systemMetadataService;
|
||||
_searchDocumentTransformer = searchDocumentTransformer;
|
||||
_entityIndexBuilders = entityIndexBuilders;
|
||||
this.idHashAlgo = idHashAlgo;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -601,7 +604,9 @@ public class UpdateIndicesService implements SearchIndicesService {
|
||||
SystemMetadata systemMetadata) {
|
||||
Map<String, JsonNode> documents;
|
||||
try {
|
||||
documents = TimeseriesAspectTransformer.transform(urn, aspect, aspectSpec, systemMetadata);
|
||||
documents =
|
||||
TimeseriesAspectTransformer.transform(
|
||||
urn, aspect, aspectSpec, systemMetadata, idHashAlgo);
|
||||
} catch (JsonProcessingException e) {
|
||||
log.error("Failed to generate timeseries document from aspect: {}", e.toString());
|
||||
return;
|
||||
|
@ -54,7 +54,8 @@ public class TimeseriesAspectTransformer {
|
||||
@Nonnull final Urn urn,
|
||||
@Nonnull final RecordTemplate timeseriesAspect,
|
||||
@Nonnull final AspectSpec aspectSpec,
|
||||
@Nullable final SystemMetadata systemMetadata)
|
||||
@Nullable final SystemMetadata systemMetadata,
|
||||
@Nonnull final String idHashAlgo)
|
||||
throws JsonProcessingException {
|
||||
ObjectNode commonDocument = getCommonDocument(urn, timeseriesAspect, systemMetadata);
|
||||
Map<String, JsonNode> finalDocuments = new HashMap<>();
|
||||
@ -74,7 +75,7 @@ public class TimeseriesAspectTransformer {
|
||||
final Map<TimeseriesFieldSpec, List<Object>> timeseriesFieldValueMap =
|
||||
FieldExtractor.extractFields(timeseriesAspect, aspectSpec.getTimeseriesFieldSpecs());
|
||||
timeseriesFieldValueMap.forEach((k, v) -> setTimeseriesField(document, k, v));
|
||||
finalDocuments.put(getDocId(document, null), document);
|
||||
finalDocuments.put(getDocId(document, null, idHashAlgo), document);
|
||||
|
||||
// Create new rows for the member collection fields.
|
||||
final Map<TimeseriesFieldCollectionSpec, List<Object>> timeseriesFieldCollectionValueMap =
|
||||
@ -83,7 +84,7 @@ public class TimeseriesAspectTransformer {
|
||||
timeseriesFieldCollectionValueMap.forEach(
|
||||
(key, values) ->
|
||||
finalDocuments.putAll(
|
||||
getTimeseriesFieldCollectionDocuments(key, values, commonDocument)));
|
||||
getTimeseriesFieldCollectionDocuments(key, values, commonDocument, idHashAlgo)));
|
||||
return finalDocuments;
|
||||
}
|
||||
|
||||
@ -216,12 +217,13 @@ public class TimeseriesAspectTransformer {
|
||||
private static Map<String, JsonNode> getTimeseriesFieldCollectionDocuments(
|
||||
final TimeseriesFieldCollectionSpec fieldSpec,
|
||||
final List<Object> values,
|
||||
final ObjectNode commonDocument) {
|
||||
final ObjectNode commonDocument,
|
||||
@Nonnull final String idHashAlgo) {
|
||||
return values.stream()
|
||||
.map(value -> getTimeseriesFieldCollectionDocument(fieldSpec, value, commonDocument))
|
||||
.collect(
|
||||
Collectors.toMap(
|
||||
keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst()),
|
||||
keyDocPair -> getDocId(keyDocPair.getSecond(), keyDocPair.getFirst(), idHashAlgo),
|
||||
Pair::getSecond));
|
||||
}
|
||||
|
||||
@ -257,9 +259,9 @@ public class TimeseriesAspectTransformer {
|
||||
finalDocument);
|
||||
}
|
||||
|
||||
private static String getDocId(@Nonnull JsonNode document, String collectionId)
|
||||
private static String getDocId(
|
||||
@Nonnull JsonNode document, String collectionId, @Nonnull String idHashAlgo)
|
||||
throws IllegalArgumentException {
|
||||
String hashAlgo = System.getenv("ELASTIC_ID_HASH_ALGO");
|
||||
String docId = document.get(MappingsBuilder.TIMESTAMP_MILLIS_FIELD).toString();
|
||||
JsonNode eventGranularity = document.get(MappingsBuilder.EVENT_GRANULARITY);
|
||||
if (eventGranularity != null) {
|
||||
@ -278,9 +280,9 @@ public class TimeseriesAspectTransformer {
|
||||
docId += partitionSpec.toString();
|
||||
}
|
||||
|
||||
if (hashAlgo.equalsIgnoreCase("SHA-256")) {
|
||||
if (idHashAlgo.equalsIgnoreCase("SHA-256")) {
|
||||
return DigestUtils.sha256Hex(docId);
|
||||
} else if (hashAlgo.equalsIgnoreCase("MD5")) {
|
||||
} else if (idHashAlgo.equalsIgnoreCase("MD5")) {
|
||||
return DigestUtils.md5Hex(docId);
|
||||
}
|
||||
throw new IllegalArgumentException("Hash function not handled !");
|
||||
|
@ -62,7 +62,7 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase {
|
||||
@Nonnull
|
||||
protected abstract ESIndexBuilder getIndexBuilder();
|
||||
|
||||
private final IndexConvention _indexConvention = IndexConventionImpl.NO_PREFIX;
|
||||
private final IndexConvention _indexConvention = IndexConventionImpl.noPrefix("MD5");
|
||||
private final String _indexName = _indexConvention.getIndexName(INDEX_NAME);
|
||||
private ElasticSearchGraphService _client;
|
||||
|
||||
@ -108,7 +108,8 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase {
|
||||
_indexConvention,
|
||||
writeDAO,
|
||||
readDAO,
|
||||
getIndexBuilder());
|
||||
getIndexBuilder(),
|
||||
"MD5");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -122,7 +122,7 @@ public abstract class LineageServiceTestBase extends AbstractTestNGSpringContext
|
||||
operationContext =
|
||||
TestOperationContexts.systemContextNoSearchAuthorization(
|
||||
new SnapshotEntityRegistry(new Snapshot()),
|
||||
new IndexConventionImpl("lineage_search_service_test"))
|
||||
new IndexConventionImpl("lineage_search_service_test", "MD5"))
|
||||
.asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH);
|
||||
settingsBuilder = new SettingsBuilder(null);
|
||||
elasticSearchService = buildEntitySearchService();
|
||||
|
@ -79,7 +79,7 @@ public abstract class SearchServiceTestBase extends AbstractTestNGSpringContextT
|
||||
operationContext =
|
||||
TestOperationContexts.systemContextNoSearchAuthorization(
|
||||
new SnapshotEntityRegistry(new Snapshot()),
|
||||
new IndexConventionImpl("search_service_test"))
|
||||
new IndexConventionImpl("search_service_test", "MD5"))
|
||||
.asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH);
|
||||
|
||||
settingsBuilder = new SettingsBuilder(null);
|
||||
|
@ -62,7 +62,8 @@ public abstract class TestEntityTestBase extends AbstractTestNGSpringContextTest
|
||||
public void setup() {
|
||||
opContext =
|
||||
TestOperationContexts.systemContextNoSearchAuthorization(
|
||||
new SnapshotEntityRegistry(new Snapshot()), new IndexConventionImpl("es_service_test"));
|
||||
new SnapshotEntityRegistry(new Snapshot()),
|
||||
new IndexConventionImpl("es_service_test", "MD5"));
|
||||
settingsBuilder = new SettingsBuilder(null);
|
||||
elasticSearchService = buildService();
|
||||
elasticSearchService.reindexAll(Collections.emptySet());
|
||||
|
@ -45,7 +45,7 @@ public class BrowseDAOTest extends AbstractTestNGSpringContextTests {
|
||||
mockClient = mock(RestHighLevelClient.class);
|
||||
opContext =
|
||||
TestOperationContexts.systemContextNoSearchAuthorization(
|
||||
new IndexConventionImpl("es_browse_dao_test"));
|
||||
new IndexConventionImpl("es_browse_dao_test", "MD5"));
|
||||
browseDAO = new ESBrowseDAO(mockClient, searchConfiguration, customSearchConfiguration);
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@ public abstract class SystemMetadataServiceTestBase extends AbstractTestNGSpring
|
||||
protected abstract ESIndexBuilder getIndexBuilder();
|
||||
|
||||
private final IndexConvention _indexConvention =
|
||||
new IndexConventionImpl("es_system_metadata_service_test");
|
||||
new IndexConventionImpl("es_system_metadata_service_test", "MD5");
|
||||
|
||||
private ElasticSearchSystemMetadataService _client;
|
||||
|
||||
|
@ -126,7 +126,7 @@ public abstract class TimeseriesAspectServiceTestBase extends AbstractTestNGSpri
|
||||
|
||||
opContext =
|
||||
TestOperationContexts.systemContextNoSearchAuthorization(
|
||||
entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test"));
|
||||
entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test", "MD5"));
|
||||
|
||||
elasticSearchTimeseriesAspectService = buildService();
|
||||
elasticSearchTimeseriesAspectService.reindexAll(Collections.emptySet());
|
||||
@ -152,7 +152,7 @@ public abstract class TimeseriesAspectServiceTestBase extends AbstractTestNGSpri
|
||||
|
||||
private void upsertDocument(TestEntityProfile dp, Urn urn) throws JsonProcessingException {
|
||||
Map<String, JsonNode> documents =
|
||||
TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null);
|
||||
TimeseriesAspectTransformer.transform(urn, dp, aspectSpec, null, "MD5");
|
||||
assertEquals(documents.size(), 3);
|
||||
documents.forEach(
|
||||
(key, value) ->
|
||||
|
@ -86,12 +86,12 @@ public class SampleDataFixtureConfiguration {
|
||||
|
||||
@Bean(name = "sampleDataIndexConvention")
|
||||
protected IndexConvention indexConvention(@Qualifier("sampleDataPrefix") String prefix) {
|
||||
return new IndexConventionImpl(prefix);
|
||||
return new IndexConventionImpl(prefix, "MD5");
|
||||
}
|
||||
|
||||
@Bean(name = "longTailIndexConvention")
|
||||
protected IndexConvention longTailIndexConvention(@Qualifier("longTailPrefix") String prefix) {
|
||||
return new IndexConventionImpl(prefix);
|
||||
return new IndexConventionImpl(prefix, "MD5");
|
||||
}
|
||||
|
||||
@Bean(name = "sampleDataFixtureName")
|
||||
|
@ -71,7 +71,7 @@ public class SearchLineageFixtureConfiguration {
|
||||
|
||||
@Bean(name = "searchLineageIndexConvention")
|
||||
protected IndexConvention indexConvention(@Qualifier("searchLineagePrefix") String prefix) {
|
||||
return new IndexConventionImpl(prefix);
|
||||
return new IndexConventionImpl(prefix, "MD5");
|
||||
}
|
||||
|
||||
@Bean(name = "searchLineageFixtureName")
|
||||
@ -173,7 +173,8 @@ public class SearchLineageFixtureConfiguration {
|
||||
new ESGraphWriteDAO(indexConvention, bulkProcessor, 1),
|
||||
new ESGraphQueryDAO(
|
||||
searchClient, lineageRegistry, indexConvention, getGraphQueryConfiguration()),
|
||||
indexBuilder);
|
||||
indexBuilder,
|
||||
indexConvention.getIdHashAlgo());
|
||||
graphService.reindexAll(Collections.emptySet());
|
||||
return graphService;
|
||||
}
|
||||
|
@ -125,7 +125,8 @@ public class UpdateIndicesHookTest {
|
||||
mockTimeseriesAspectService,
|
||||
mockSystemMetadataService,
|
||||
searchDocumentTransformer,
|
||||
mockEntityIndexBuilders);
|
||||
mockEntityIndexBuilders,
|
||||
"MD5");
|
||||
|
||||
OperationContext systemOperationContext =
|
||||
TestOperationContexts.systemContextNoSearchAuthorization();
|
||||
@ -235,7 +236,8 @@ public class UpdateIndicesHookTest {
|
||||
mockTimeseriesAspectService,
|
||||
mockSystemMetadataService,
|
||||
searchDocumentTransformer,
|
||||
mockEntityIndexBuilders);
|
||||
mockEntityIndexBuilders,
|
||||
"MD5");
|
||||
|
||||
updateIndicesHook = new UpdateIndicesHook(updateIndicesService, true, false);
|
||||
updateIndicesHook.init(
|
||||
|
@ -21,7 +21,7 @@ import lombok.Getter;
|
||||
public class SearchContext implements ContextInterface {
|
||||
|
||||
public static SearchContext EMPTY =
|
||||
SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build();
|
||||
SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("")).build();
|
||||
|
||||
public static SearchContext withFlagDefaults(
|
||||
@Nonnull SearchContext searchContext,
|
||||
|
@ -191,7 +191,7 @@ public class TestOperationContexts {
|
||||
IndexConvention indexConvention =
|
||||
Optional.ofNullable(indexConventionSupplier)
|
||||
.map(Supplier::get)
|
||||
.orElse(IndexConventionImpl.NO_PREFIX);
|
||||
.orElse(IndexConventionImpl.noPrefix("MD5"));
|
||||
|
||||
ServicesRegistryContext servicesRegistryContext =
|
||||
Optional.ofNullable(servicesRegistrySupplier).orElse(() -> null).get();
|
||||
|
@ -12,26 +12,26 @@ public class SearchContextTest {
|
||||
@Test
|
||||
public void searchContextId() {
|
||||
SearchContext testNoFlags =
|
||||
SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build();
|
||||
SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build();
|
||||
|
||||
assertEquals(
|
||||
testNoFlags.getCacheKeyComponent(),
|
||||
SearchContext.builder()
|
||||
.indexConvention(IndexConventionImpl.NO_PREFIX)
|
||||
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
|
||||
.build()
|
||||
.getCacheKeyComponent(),
|
||||
"Expected consistent context ids across instances");
|
||||
|
||||
SearchContext testWithFlags =
|
||||
SearchContext.builder()
|
||||
.indexConvention(IndexConventionImpl.NO_PREFIX)
|
||||
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
|
||||
.searchFlags(new SearchFlags().setFulltext(true))
|
||||
.build();
|
||||
|
||||
assertEquals(
|
||||
testWithFlags.getCacheKeyComponent(),
|
||||
SearchContext.builder()
|
||||
.indexConvention(IndexConventionImpl.NO_PREFIX)
|
||||
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
|
||||
.searchFlags(new SearchFlags().setFulltext(true))
|
||||
.build()
|
||||
.getCacheKeyComponent(),
|
||||
@ -44,7 +44,7 @@ public class SearchContextTest {
|
||||
assertNotEquals(
|
||||
testWithFlags.getCacheKeyComponent(),
|
||||
SearchContext.builder()
|
||||
.indexConvention(IndexConventionImpl.NO_PREFIX)
|
||||
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
|
||||
.searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true))
|
||||
.build()
|
||||
.getCacheKeyComponent(),
|
||||
@ -53,7 +53,7 @@ public class SearchContextTest {
|
||||
assertNotEquals(
|
||||
testNoFlags.getCacheKeyComponent(),
|
||||
SearchContext.builder()
|
||||
.indexConvention(new IndexConventionImpl("Some Prefix"))
|
||||
.indexConvention(new IndexConventionImpl("Some Prefix", "MD5"))
|
||||
.searchFlags(null)
|
||||
.build()
|
||||
.getCacheKeyComponent(),
|
||||
@ -61,7 +61,7 @@ public class SearchContextTest {
|
||||
|
||||
assertNotEquals(
|
||||
SearchContext.builder()
|
||||
.indexConvention(IndexConventionImpl.NO_PREFIX)
|
||||
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
|
||||
.searchFlags(
|
||||
new SearchFlags()
|
||||
.setFulltext(false)
|
||||
@ -70,7 +70,7 @@ public class SearchContextTest {
|
||||
.build()
|
||||
.getCacheKeyComponent(),
|
||||
SearchContext.builder()
|
||||
.indexConvention(IndexConventionImpl.NO_PREFIX)
|
||||
.indexConvention(IndexConventionImpl.noPrefix("MD5"))
|
||||
.searchFlags(new SearchFlags().setFulltext(true).setIncludeRestricted(true))
|
||||
.build()
|
||||
.getCacheKeyComponent(),
|
||||
@ -80,7 +80,7 @@ public class SearchContextTest {
|
||||
@Test
|
||||
public void testImmutableSearchFlags() {
|
||||
SearchContext initial =
|
||||
SearchContext.builder().indexConvention(IndexConventionImpl.NO_PREFIX).build();
|
||||
SearchContext.builder().indexConvention(IndexConventionImpl.noPrefix("MD5")).build();
|
||||
assertEquals(initial.getSearchFlags(), new SearchFlags().setSkipCache(false));
|
||||
|
||||
SearchContext mutated = initial.withFlagDefaults(flags -> flags.setSkipCache(true));
|
||||
|
@ -8,4 +8,5 @@ public class ElasticSearchConfiguration {
|
||||
private BuildIndicesConfiguration buildIndices;
|
||||
public String implementation;
|
||||
private SearchConfiguration search;
|
||||
private String idHashAlgo;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import com.linkedin.metadata.models.registry.LineageRegistry;
|
||||
import javax.annotation.Nonnull;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
@ -30,7 +31,8 @@ public class ElasticSearchGraphServiceFactory {
|
||||
|
||||
@Bean(name = "elasticSearchGraphService")
|
||||
@Nonnull
|
||||
protected ElasticSearchGraphService getInstance() {
|
||||
protected ElasticSearchGraphService getInstance(
|
||||
@Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) {
|
||||
LineageRegistry lineageRegistry = new LineageRegistry(entityRegistry);
|
||||
return new ElasticSearchGraphService(
|
||||
lineageRegistry,
|
||||
@ -45,6 +47,7 @@ public class ElasticSearchGraphServiceFactory {
|
||||
lineageRegistry,
|
||||
components.getIndexConvention(),
|
||||
configurationProvider.getElasticSearch().getSearch().getGraph()),
|
||||
components.getIndexBuilder());
|
||||
components.getIndexBuilder(),
|
||||
idHashAlgo);
|
||||
}
|
||||
}
|
||||
|
@ -19,7 +19,8 @@ public class IndexConventionFactory {
|
||||
private String indexPrefix;
|
||||
|
||||
@Bean(name = INDEX_CONVENTION_BEAN)
|
||||
protected IndexConvention createInstance() {
|
||||
return new IndexConventionImpl(indexPrefix);
|
||||
protected IndexConvention createInstance(
|
||||
@Value("${elasticsearch.idHashAlgo}") final String isHashAlgo) {
|
||||
return new IndexConventionImpl(indexPrefix, isHashAlgo);
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ import com.linkedin.metadata.search.transformer.SearchDocumentTransformer;
|
||||
import com.linkedin.metadata.service.UpdateIndicesService;
|
||||
import com.linkedin.metadata.systemmetadata.SystemMetadataService;
|
||||
import com.linkedin.metadata.timeseries.TimeseriesAspectService;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
@ -30,7 +31,8 @@ public class UpdateIndicesServiceFactory {
|
||||
TimeseriesAspectService timeseriesAspectService,
|
||||
SystemMetadataService systemMetadataService,
|
||||
SearchDocumentTransformer searchDocumentTransformer,
|
||||
EntityIndexBuilders entityIndexBuilders) {
|
||||
EntityIndexBuilders entityIndexBuilders,
|
||||
@Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) {
|
||||
|
||||
return new UpdateIndicesService(
|
||||
graphService,
|
||||
@ -38,7 +40,8 @@ public class UpdateIndicesServiceFactory {
|
||||
timeseriesAspectService,
|
||||
systemMetadataService,
|
||||
searchDocumentTransformer,
|
||||
entityIndexBuilders);
|
||||
entityIndexBuilders,
|
||||
idHashAlgo);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ -50,7 +53,8 @@ public class UpdateIndicesServiceFactory {
|
||||
final SystemMetadataService systemMetadataService,
|
||||
final SearchDocumentTransformer searchDocumentTransformer,
|
||||
final EntityIndexBuilders entityIndexBuilders,
|
||||
final EntityService<?> entityService) {
|
||||
final EntityService<?> entityService,
|
||||
@Value("${elasticsearch.idHashAlgo}") final String idHashAlgo) {
|
||||
|
||||
UpdateIndicesService updateIndicesService =
|
||||
new UpdateIndicesService(
|
||||
@ -59,7 +63,8 @@ public class UpdateIndicesServiceFactory {
|
||||
timeseriesAspectService,
|
||||
systemMetadataService,
|
||||
searchDocumentTransformer,
|
||||
entityIndexBuilders);
|
||||
entityIndexBuilders,
|
||||
idHashAlgo);
|
||||
|
||||
entityService.setUpdateIndicesService(updateIndicesService);
|
||||
|
||||
|
@ -2,25 +2,20 @@ package com.linkedin.metadata.resources.usage;
|
||||
|
||||
import static com.datahub.authorization.AuthUtil.isAPIAuthorized;
|
||||
import static com.datahub.authorization.AuthUtil.isAPIAuthorizedEntityUrns;
|
||||
import static com.linkedin.metadata.Constants.*;
|
||||
import static com.linkedin.metadata.authorization.ApiOperation.UPDATE;
|
||||
import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ASPECT_NAME;
|
||||
import static com.linkedin.metadata.timeseries.elastic.UsageServiceUtil.USAGE_STATS_ENTITY_NAME;
|
||||
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.codahale.metrics.Timer;
|
||||
import com.datahub.authentication.Authentication;
|
||||
import com.datahub.authentication.AuthenticationContext;
|
||||
import com.datahub.authorization.EntitySpec;
|
||||
import com.datahub.plugins.auth.authorization.Authorizer;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.core.StreamReadConstraints;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.linkedin.common.WindowDuration;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.common.urn.UrnUtils;
|
||||
import com.linkedin.data.template.StringArray;
|
||||
import com.linkedin.dataset.DatasetFieldUsageCounts;
|
||||
import com.linkedin.dataset.DatasetFieldUsageCountsArray;
|
||||
import com.linkedin.dataset.DatasetUsageStatistics;
|
||||
@ -29,17 +24,10 @@ import com.linkedin.dataset.DatasetUserUsageCountsArray;
|
||||
import com.linkedin.metadata.authorization.PoliciesConfig;
|
||||
import com.linkedin.metadata.models.AspectSpec;
|
||||
import com.linkedin.metadata.models.registry.EntityRegistry;
|
||||
import com.linkedin.metadata.query.filter.Condition;
|
||||
import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
|
||||
import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
|
||||
import com.linkedin.metadata.query.filter.Criterion;
|
||||
import com.linkedin.metadata.query.filter.CriterionArray;
|
||||
import com.linkedin.metadata.query.filter.Filter;
|
||||
import com.linkedin.metadata.restli.RestliUtil;
|
||||
import com.linkedin.metadata.timeseries.TimeseriesAspectService;
|
||||
import com.linkedin.metadata.timeseries.elastic.UsageServiceUtil;
|
||||
import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer;
|
||||
import com.linkedin.metadata.utils.metrics.MetricUtils;
|
||||
import com.linkedin.parseq.Task;
|
||||
import com.linkedin.restli.common.HttpStatus;
|
||||
import com.linkedin.restli.server.RestLiServiceException;
|
||||
@ -47,35 +35,20 @@ import com.linkedin.restli.server.annotations.Action;
|
||||
import com.linkedin.restli.server.annotations.ActionParam;
|
||||
import com.linkedin.restli.server.annotations.RestLiSimpleResource;
|
||||
import com.linkedin.restli.server.resources.SimpleResourceTemplate;
|
||||
import com.linkedin.timeseries.AggregationSpec;
|
||||
import com.linkedin.timeseries.AggregationType;
|
||||
import com.linkedin.timeseries.CalendarInterval;
|
||||
import com.linkedin.timeseries.GenericTable;
|
||||
import com.linkedin.timeseries.GroupingBucket;
|
||||
import com.linkedin.timeseries.GroupingBucketType;
|
||||
import com.linkedin.timeseries.TimeWindowSize;
|
||||
import com.linkedin.usage.FieldUsageCounts;
|
||||
import com.linkedin.usage.FieldUsageCountsArray;
|
||||
import com.linkedin.usage.UsageAggregation;
|
||||
import com.linkedin.usage.UsageAggregationArray;
|
||||
import com.linkedin.usage.UsageAggregationMetrics;
|
||||
import com.linkedin.usage.UsageQueryResult;
|
||||
import com.linkedin.usage.UsageQueryResultAggregations;
|
||||
import com.linkedin.usage.UsageTimeRange;
|
||||
import com.linkedin.usage.UserUsageCounts;
|
||||
import com.linkedin.usage.UserUsageCountsArray;
|
||||
import io.datahubproject.metadata.context.OperationContext;
|
||||
import io.datahubproject.metadata.context.RequestContext;
|
||||
import io.opentelemetry.extension.annotations.WithSpan;
|
||||
import java.net.URISyntaxException;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Named;
|
||||
@ -255,7 +228,8 @@ public class UsageStats extends SimpleResourceTemplate<UsageAggregation> {
|
||||
try {
|
||||
documents =
|
||||
TimeseriesAspectTransformer.transform(
|
||||
bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null);
|
||||
bucket.getResource(), datasetUsageStatistics, getUsageStatsAspectSpec(), null,
|
||||
systemOperationContext.getSearchContext().getIndexConvention().getIdHashAlgo());
|
||||
} catch (JsonProcessingException e) {
|
||||
log.error("Failed to generate timeseries document from aspect: {}", e.toString());
|
||||
return;
|
||||
|
@ -47,4 +47,7 @@ public interface IndexConvention {
|
||||
* if one cannot be extracted
|
||||
*/
|
||||
Optional<Pair<String, String>> getEntityAndAspectName(String timeseriesAspectIndexName);
|
||||
|
||||
@Nonnull
|
||||
String getIdHashAlgo();
|
||||
}
|
||||
|
@ -8,25 +8,30 @@ import java.util.Optional;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import lombok.Getter;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
// Default implementation of search index naming convention
|
||||
public class IndexConventionImpl implements IndexConvention {
|
||||
public static final IndexConvention NO_PREFIX = new IndexConventionImpl(null);
|
||||
public static IndexConvention noPrefix(@Nonnull String idHashAlgo) {
|
||||
return new IndexConventionImpl(null, idHashAlgo);
|
||||
}
|
||||
|
||||
// Map from Entity name -> Index name
|
||||
private final Map<String, String> indexNameMapping = new ConcurrentHashMap<>();
|
||||
private final Optional<String> _prefix;
|
||||
private final String _getAllEntityIndicesPattern;
|
||||
private final String _getAllTimeseriesIndicesPattern;
|
||||
@Getter private final String idHashAlgo;
|
||||
|
||||
private static final String ENTITY_INDEX_VERSION = "v2";
|
||||
private static final String ENTITY_INDEX_SUFFIX = "index";
|
||||
private static final String TIMESERIES_INDEX_VERSION = "v1";
|
||||
private static final String TIMESERIES_ENTITY_INDEX_SUFFIX = "aspect";
|
||||
|
||||
public IndexConventionImpl(@Nullable String prefix) {
|
||||
public IndexConventionImpl(@Nullable String prefix, String idHashAlgo) {
|
||||
_prefix = StringUtils.isEmpty(prefix) ? Optional.empty() : Optional.of(prefix);
|
||||
this.idHashAlgo = idHashAlgo;
|
||||
_getAllEntityIndicesPattern =
|
||||
_prefix.map(p -> p + "_").orElse("")
|
||||
+ "*"
|
||||
|
@ -10,7 +10,7 @@ public class IndexConventionImplTest {
|
||||
|
||||
@Test
|
||||
public void testIndexConventionNoPrefix() {
|
||||
IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX;
|
||||
IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5");
|
||||
String entityName = "dataset";
|
||||
String expectedIndexName = "datasetindex_v2";
|
||||
assertEquals(indexConventionNoPrefix.getEntityIndexName(entityName), expectedIndexName);
|
||||
@ -25,7 +25,7 @@ public class IndexConventionImplTest {
|
||||
|
||||
@Test
|
||||
public void testIndexConventionPrefix() {
|
||||
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix");
|
||||
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5");
|
||||
String entityName = "dataset";
|
||||
String expectedIndexName = "prefix_datasetindex_v2";
|
||||
assertEquals(indexConventionPrefix.getEntityIndexName(entityName), expectedIndexName);
|
||||
@ -42,7 +42,7 @@ public class IndexConventionImplTest {
|
||||
|
||||
@Test
|
||||
public void testTimeseriesIndexConventionNoPrefix() {
|
||||
IndexConvention indexConventionNoPrefix = IndexConventionImpl.NO_PREFIX;
|
||||
IndexConvention indexConventionNoPrefix = IndexConventionImpl.noPrefix("MD5");
|
||||
String entityName = "dataset";
|
||||
String aspectName = "datasetusagestatistics";
|
||||
String expectedIndexName = "dataset_datasetusagestatisticsaspect_v1";
|
||||
@ -64,7 +64,7 @@ public class IndexConventionImplTest {
|
||||
|
||||
@Test
|
||||
public void testTimeseriesIndexConventionPrefix() {
|
||||
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix");
|
||||
IndexConvention indexConventionPrefix = new IndexConventionImpl("prefix", "MD5");
|
||||
String entityName = "dataset";
|
||||
String aspectName = "datasetusagestatistics";
|
||||
String expectedIndexName = "prefix_dataset_datasetusagestatisticsaspect_v1";
|
||||
|
@ -16,8 +16,6 @@ DATAHUB_SEARCH_TAG="${DATAHUB_SEARCH_TAG:=2.9.0}"
|
||||
XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true}"
|
||||
ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}"
|
||||
USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}"
|
||||
ELASTIC_ID_HASH_ALGO="${ELASTIC_ID_HASH_ALGO:=MD5}"
|
||||
|
||||
|
||||
DATAHUB_TELEMETRY_ENABLED=false \
|
||||
DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \
|
||||
|
@ -1,3 +1,2 @@
|
||||
export DATAHUB_KAFKA_SCHEMA_REGISTRY_URL=http://localhost:8080/schema-registry/api
|
||||
export DATAHUB_GMS_URL=http://localhost:8080
|
||||
export ELASTIC_ID_HASH_ALGO="MD5"
|
||||
export DATAHUB_GMS_URL=http://localhost:8080
|
Loading…
x
Reference in New Issue
Block a user