mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-30 20:06:19 +00:00
MINOR: Implement Multi Data Streams. Fix Duplicating issue (#17365)
* Implement Multi Data Streams. Fix Duplicating issue * Fix Test for multi index search --------- Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
This commit is contained in:
parent
c84b77859e
commit
7b191b891d
@ -9,6 +9,7 @@ import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import lombok.Getter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang.exception.ExceptionUtils;
|
||||
@ -42,6 +43,7 @@ import org.quartz.JobExecutionContext;
|
||||
@Slf4j
|
||||
public class DataInsightsApp extends AbstractNativeApplication {
|
||||
public static final String REPORT_DATA_TYPE_KEY = "ReportDataType";
|
||||
public static final String DATA_ASSET_INDEX_PREFIX = "di-data-assets";
|
||||
@Getter private Long timestamp;
|
||||
@Getter private int batchSize;
|
||||
|
||||
@ -53,6 +55,24 @@ public class DataInsightsApp extends AbstractNativeApplication {
|
||||
@Getter EventPublisherJob jobData;
|
||||
private volatile boolean stopped = false;
|
||||
|
||||
public final Set<String> dataAssetTypes =
|
||||
Set.of(
|
||||
"table",
|
||||
"storedProcedure",
|
||||
"databaseSchema",
|
||||
"database",
|
||||
"chart",
|
||||
"dashboard",
|
||||
"dashboardDataModel",
|
||||
"pipeline",
|
||||
"topic",
|
||||
"container",
|
||||
"searchIndex",
|
||||
"mlmodel",
|
||||
"dataProduct",
|
||||
"glossaryTerm",
|
||||
"tag");
|
||||
|
||||
public DataInsightsApp(CollectionDAO collectionDAO, SearchRepository searchRepository) {
|
||||
super(collectionDAO, searchRepository);
|
||||
}
|
||||
@ -75,12 +95,19 @@ public class DataInsightsApp extends AbstractNativeApplication {
|
||||
return searchInterface;
|
||||
}
|
||||
|
||||
public static String getDataStreamName(String dataAssetType) {
|
||||
return String.format("%s-%s", DATA_ASSET_INDEX_PREFIX, dataAssetType).toLowerCase();
|
||||
}
|
||||
|
||||
private void createDataAssetsDataStream() {
|
||||
DataInsightsSearchInterface searchInterface = getSearchInterface();
|
||||
|
||||
try {
|
||||
if (!searchInterface.dataAssetDataStreamExists("di-data-assets")) {
|
||||
searchInterface.createDataAssetsDataStream();
|
||||
for (String dataAssetType : dataAssetTypes) {
|
||||
String dataStreamName = getDataStreamName(dataAssetType);
|
||||
if (!searchInterface.dataAssetDataStreamExists(dataStreamName)) {
|
||||
searchInterface.createDataAssetsDataStream(dataStreamName);
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
LOG.error("Couldn't install DataInsightsApp: Can't initialize ElasticSearch Index.", ex);
|
||||
@ -91,8 +118,11 @@ public class DataInsightsApp extends AbstractNativeApplication {
|
||||
DataInsightsSearchInterface searchInterface = getSearchInterface();
|
||||
|
||||
try {
|
||||
if (searchInterface.dataAssetDataStreamExists("di-data-assets")) {
|
||||
searchInterface.deleteDataAssetDataStream();
|
||||
for (String dataAssetType : dataAssetTypes) {
|
||||
String dataStreamName = getDataStreamName(dataAssetType);
|
||||
if (searchInterface.dataAssetDataStreamExists(dataStreamName)) {
|
||||
searchInterface.deleteDataAssetDataStream(dataStreamName);
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
LOG.error("Couldn't delete DataAssets DataStream", ex);
|
||||
@ -231,7 +261,8 @@ public class DataInsightsApp extends AbstractNativeApplication {
|
||||
|
||||
private WorkflowStats processDataAssets() {
|
||||
DataAssetsWorkflow workflow =
|
||||
new DataAssetsWorkflow(timestamp, batchSize, backfill, collectionDAO, searchRepository);
|
||||
new DataAssetsWorkflow(
|
||||
timestamp, batchSize, backfill, dataAssetTypes, collectionDAO, searchRepository);
|
||||
WorkflowStats workflowStats = workflow.getWorkflowStats();
|
||||
|
||||
try {
|
||||
|
@ -23,9 +23,9 @@ public interface DataInsightsSearchInterface {
|
||||
}
|
||||
}
|
||||
|
||||
void createDataAssetsDataStream() throws IOException;
|
||||
void createDataAssetsDataStream(String name) throws IOException;
|
||||
|
||||
void deleteDataAssetDataStream() throws IOException;
|
||||
void deleteDataAssetDataStream(String name) throws IOException;
|
||||
|
||||
Boolean dataAssetDataStreamExists(String name) throws IOException;
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ public class ElasticSearchDataInsightsClient implements DataInsightsSearchInterf
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createDataAssetsDataStream() throws IOException {
|
||||
public void createDataAssetsDataStream(String name) throws IOException {
|
||||
String resourcePath = "/dataInsights/elasticsearch";
|
||||
createLifecyclePolicy(
|
||||
"di-data-assets-lifecycle",
|
||||
@ -65,11 +65,11 @@ public class ElasticSearchDataInsightsClient implements DataInsightsSearchInterf
|
||||
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath)));
|
||||
createIndexTemplate(
|
||||
"di-data-assets", readResource(String.format("%s/indexTemplate.json", resourcePath)));
|
||||
createDataStream("di-data-assets");
|
||||
createDataStream(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDataAssetDataStream() throws IOException {
|
||||
performRequest("DELETE", "/_data_stream/di-data-assets");
|
||||
public void deleteDataAssetDataStream(String name) throws IOException {
|
||||
performRequest("DELETE", String.format("/_data_stream/%s", name));
|
||||
}
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ public class OpenSearchDataInsightsClient implements DataInsightsSearchInterface
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createDataAssetsDataStream() throws IOException {
|
||||
public void createDataAssetsDataStream(String name) throws IOException {
|
||||
String resourcePath = "/dataInsights/opensearch";
|
||||
createLifecyclePolicy(
|
||||
"di-data-assets-lifecycle",
|
||||
@ -73,11 +73,11 @@ public class OpenSearchDataInsightsClient implements DataInsightsSearchInterface
|
||||
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath)));
|
||||
createIndexTemplate(
|
||||
"di-data-assets", readResource(String.format("%s/indexTemplate.json", resourcePath)));
|
||||
createDataStream("di-data-assets");
|
||||
createDataStream(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDataAssetDataStream() throws IOException {
|
||||
performRequest("DELETE", "_data_stream/di-data-assets");
|
||||
public void deleteDataAssetDataStream(String name) throws IOException {
|
||||
performRequest("DELETE", String.format("_data_stream/%s", name));
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.openmetadata.service.apps.bundles.insights.workflows.dataAssets;
|
||||
|
||||
import static org.openmetadata.schema.system.IndexingError.ErrorSource.READER;
|
||||
import static org.openmetadata.service.apps.bundles.insights.DataInsightsApp.getDataStreamName;
|
||||
import static org.openmetadata.service.apps.bundles.insights.utils.TimestampUtils.END_TIMESTAMP_KEY;
|
||||
import static org.openmetadata.service.apps.bundles.insights.utils.TimestampUtils.START_TIMESTAMP_KEY;
|
||||
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.ENTITY_TYPE_KEY;
|
||||
@ -38,7 +39,7 @@ import org.openmetadata.service.workflows.searchIndex.PaginatedEntitiesSource;
|
||||
|
||||
@Slf4j
|
||||
public class DataAssetsWorkflow {
|
||||
public static final String DATA_ASSETS_DATA_STREAM = "di-data-assets";
|
||||
public static final String DATA_STREAM_KEY = "DataStreamKey";
|
||||
private final int retentionDays = 30;
|
||||
private final Long startTimestamp;
|
||||
private final Long endTimestamp;
|
||||
@ -46,23 +47,7 @@ public class DataAssetsWorkflow {
|
||||
private final SearchRepository searchRepository;
|
||||
private final CollectionDAO collectionDAO;
|
||||
private final List<PaginatedEntitiesSource> sources = new ArrayList<>();
|
||||
private final Set<String> entityTypes =
|
||||
Set.of(
|
||||
"table",
|
||||
"storedProcedure",
|
||||
"databaseSchema",
|
||||
"database",
|
||||
"chart",
|
||||
"dashboard",
|
||||
"dashboardDataModel",
|
||||
"pipeline",
|
||||
"topic",
|
||||
"container",
|
||||
"searchIndex",
|
||||
"mlmodel",
|
||||
"dataProduct",
|
||||
"glossaryTerm",
|
||||
"tag");
|
||||
private final Set<String> entityTypes;
|
||||
|
||||
private DataInsightsEntityEnricherProcessor entityEnricher;
|
||||
private Processor entityProcessor;
|
||||
@ -73,6 +58,7 @@ public class DataAssetsWorkflow {
|
||||
Long timestamp,
|
||||
int batchSize,
|
||||
Optional<DataInsightsApp.Backfill> backfill,
|
||||
Set<String> entityTypes,
|
||||
CollectionDAO collectionDAO,
|
||||
SearchRepository searchRepository) {
|
||||
if (backfill.isPresent()) {
|
||||
@ -105,6 +91,7 @@ public class DataAssetsWorkflow {
|
||||
this.batchSize = batchSize;
|
||||
this.searchRepository = searchRepository;
|
||||
this.collectionDAO = collectionDAO;
|
||||
this.entityTypes = entityTypes;
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
@ -144,9 +131,9 @@ public class DataAssetsWorkflow {
|
||||
contextData.put(START_TIMESTAMP_KEY, startTimestamp);
|
||||
contextData.put(END_TIMESTAMP_KEY, endTimestamp);
|
||||
|
||||
deleteDataBeforeInserting();
|
||||
|
||||
for (PaginatedEntitiesSource source : sources) {
|
||||
deleteDataBeforeInserting(getDataStreamName(source.getEntityType()));
|
||||
contextData.put(DATA_STREAM_KEY, getDataStreamName(source.getEntityType()));
|
||||
contextData.put(ENTITY_TYPE_KEY, source.getEntityType());
|
||||
|
||||
while (!source.isDone()) {
|
||||
@ -191,12 +178,12 @@ public class DataAssetsWorkflow {
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteDataBeforeInserting() throws SearchIndexException {
|
||||
private void deleteDataBeforeInserting(String dataStreamName) throws SearchIndexException {
|
||||
try {
|
||||
searchRepository
|
||||
.getSearchClient()
|
||||
.deleteByQuery(
|
||||
DATA_ASSETS_DATA_STREAM,
|
||||
dataStreamName,
|
||||
String.format(
|
||||
"{\"@timestamp\": {\"gte\": %s, \"lte\": %s}}", startTimestamp, endTimestamp));
|
||||
} catch (Exception rx) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.processors;
|
||||
|
||||
import static org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.DataAssetsWorkflow.DATA_ASSETS_DATA_STREAM;
|
||||
import static org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.DataAssetsWorkflow.DATA_STREAM_KEY;
|
||||
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getUpdatedStats;
|
||||
|
||||
import es.org.elasticsearch.action.bulk.BulkRequest;
|
||||
@ -29,7 +29,7 @@ public class DataInsightsElasticSearchProcessor
|
||||
@Override
|
||||
public BulkRequest process(List<Map<String, Object>> input, Map<String, Object> contextData)
|
||||
throws SearchIndexException {
|
||||
String index = DATA_ASSETS_DATA_STREAM;
|
||||
String index = (String) contextData.get(DATA_STREAM_KEY);
|
||||
LOG.debug(
|
||||
"[EsEntitiesProcessor] Processing a Batch of Size: {}, Index: {} ", input.size(), index);
|
||||
BulkRequest requests;
|
||||
|
@ -92,7 +92,7 @@ public class DataInsightsEntityEnricherProcessor
|
||||
EntityInterface versionEntity =
|
||||
JsonUtils.readOrConvertValue(
|
||||
version, ENTITY_TYPE_TO_CLASS_MAP.get(entityType.toLowerCase()));
|
||||
Long versionTimestamp = TimestampUtils.getEndOfDayTimestamp(versionEntity.getUpdatedAt());
|
||||
Long versionTimestamp = TimestampUtils.getStartOfDayTimestamp(versionEntity.getUpdatedAt());
|
||||
if (versionTimestamp > pointerTimestamp) {
|
||||
continue;
|
||||
} else if (versionTimestamp < startTimestamp) {
|
||||
@ -108,15 +108,12 @@ public class DataInsightsEntityEnricherProcessor
|
||||
Map<String, Object> versionMap = new HashMap<>();
|
||||
|
||||
versionMap.put("endTimestamp", pointerTimestamp);
|
||||
versionMap.put("startTimestamp", versionTimestamp);
|
||||
versionMap.put("startTimestamp", TimestampUtils.getEndOfDayTimestamp(versionTimestamp));
|
||||
versionMap.put("versionEntity", versionEntity);
|
||||
|
||||
entityVersions.add(versionMap);
|
||||
if (versionTimestamp.equals(pointerTimestamp)) {
|
||||
pointerTimestamp = TimestampUtils.subtractDays(pointerTimestamp, 1);
|
||||
} else {
|
||||
pointerTimestamp = versionTimestamp;
|
||||
}
|
||||
pointerTimestamp =
|
||||
TimestampUtils.getEndOfDayTimestamp(TimestampUtils.subtractDays(versionTimestamp, 1));
|
||||
}
|
||||
}
|
||||
return entityVersions;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.processors;
|
||||
|
||||
import static org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.DataAssetsWorkflow.DATA_ASSETS_DATA_STREAM;
|
||||
import static org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.DataAssetsWorkflow.DATA_STREAM_KEY;
|
||||
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getUpdatedStats;
|
||||
|
||||
import java.util.List;
|
||||
@ -29,7 +29,7 @@ public class DataInsightsOpenSearchProcessor
|
||||
@Override
|
||||
public BulkRequest process(List<Map<String, Object>> input, Map<String, Object> contextData)
|
||||
throws SearchIndexException {
|
||||
String index = DATA_ASSETS_DATA_STREAM;
|
||||
String index = (String) contextData.get(DATA_STREAM_KEY);
|
||||
LOG.debug(
|
||||
"[OsEntitiesProcessor] Processing a Batch of Size: {}, Index: {} ", input.size(), index);
|
||||
BulkRequest requests;
|
||||
|
@ -18,7 +18,7 @@ public class DataInsightSystemChartRepository extends EntityRepository<DataInsig
|
||||
private static final SearchClient searchClient = Entity.getSearchRepository().getSearchClient();
|
||||
public static final String TIMESTAMP_FIELD = "@timestamp";
|
||||
|
||||
public static final String DI_SEARCH_INDEX = "di-data-assets";
|
||||
public static final String DI_SEARCH_INDEX = "di-data-assets-*";
|
||||
|
||||
public static final String FORMULA_FUNC_REGEX =
|
||||
"\\b(count|sum|min|max|avg)+\\(k='([^']*)',?\\s*(q='([^']*)')?\\)?";
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"index_patterns": [
|
||||
"di-data-assets"
|
||||
"di-data-assets-*"
|
||||
],
|
||||
"data_stream": {},
|
||||
"composed_of": [
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"index_patterns": [
|
||||
"di-data-assets"
|
||||
"di-data-assets-*"
|
||||
],
|
||||
"data_stream": {},
|
||||
"composed_of": [
|
||||
|
@ -299,7 +299,7 @@ public class AppsResourceTest extends EntityResourceTest<App, CreateApp> {
|
||||
// -------------------------------------------------
|
||||
RestClient searchClient = getSearchClient();
|
||||
es.org.elasticsearch.client.Response response;
|
||||
Request request = new Request("GET", "di-data-assets/_search");
|
||||
Request request = new Request("GET", "di-data-assets-*/_search");
|
||||
String payload =
|
||||
String.format(
|
||||
"{\"query\":{\"bool\":{\"must\":{\"term\":{\"fullyQualifiedName.keyword\":\"%s\"}}}}}",
|
||||
|
@ -16,6 +16,7 @@ import es.org.elasticsearch.client.RestClient;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.http.client.HttpResponseException;
|
||||
@ -47,6 +48,23 @@ public class KpiResourceTest extends EntityResourceTest<Kpi, CreateKpiRequest> {
|
||||
|
||||
private void createDataAssetsDataStream() {
|
||||
DataInsightsSearchInterface searchInterface;
|
||||
Set<String> dataAssetTypes =
|
||||
Set.of(
|
||||
"table",
|
||||
"storedProcedure",
|
||||
"databaseSchema",
|
||||
"database",
|
||||
"chart",
|
||||
"dashboard",
|
||||
"dashboardDataModel",
|
||||
"pipeline",
|
||||
"topic",
|
||||
"container",
|
||||
"searchIndex",
|
||||
"mlmodel",
|
||||
"dataProduct",
|
||||
"glossaryTerm",
|
||||
"tag");
|
||||
if (getSearchRepository()
|
||||
.getSearchType()
|
||||
.equals(ElasticSearchConfiguration.SearchType.ELASTICSEARCH)) {
|
||||
@ -59,17 +77,21 @@ public class KpiResourceTest extends EntityResourceTest<Kpi, CreateKpiRequest> {
|
||||
(os.org.opensearch.client.RestClient)
|
||||
getSearchRepository().getSearchClient().getLowLevelClient());
|
||||
}
|
||||
|
||||
try {
|
||||
if (!searchInterface.dataAssetDataStreamExists("di-data-assets")) {
|
||||
searchInterface.createDataAssetsDataStream();
|
||||
for (String dataAssetType : dataAssetTypes) {
|
||||
String dataStreamName =
|
||||
String.format("%s-%s", "di-data-assets", dataAssetType).toLowerCase();
|
||||
if (!searchInterface.dataAssetDataStreamExists(dataStreamName)) {
|
||||
searchInterface.createDataAssetsDataStream(dataStreamName);
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
LOG.error("Couldn't install DataInsightsApp: Can't initialize ElasticSearch Index.");
|
||||
LOG.error("Couldn't install DataInsightsApp: Can't initialize ElasticSearch Index.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public void setupKpi() throws IOException {
|
||||
|
||||
createDataAssetsDataStream();
|
||||
KPI_TARGET = new KpiTarget().withName("Percentage").withValue("80.0");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user