mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-19 22:49:00 +00:00
* Added application run logs for DataRetentionApp * Code changes to accommodate change in EntityStats
This commit is contained in:
parent
0e69bbd4dc
commit
06953ee266
@ -1,24 +1,41 @@
|
|||||||
package org.openmetadata.service.apps.bundles.dataRetention;
|
package org.openmetadata.service.apps.bundles.dataRetention;
|
||||||
|
|
||||||
|
import static org.openmetadata.service.apps.scheduler.OmAppJobListener.APP_RUN_STATS;
|
||||||
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.lang.exception.ExceptionUtils;
|
||||||
import org.jdbi.v3.sqlobject.transaction.Transaction;
|
import org.jdbi.v3.sqlobject.transaction.Transaction;
|
||||||
import org.openmetadata.common.utils.CommonUtil;
|
import org.openmetadata.common.utils.CommonUtil;
|
||||||
import org.openmetadata.schema.entity.app.App;
|
import org.openmetadata.schema.entity.app.App;
|
||||||
|
import org.openmetadata.schema.entity.app.AppRunRecord;
|
||||||
|
import org.openmetadata.schema.entity.app.FailureContext;
|
||||||
import org.openmetadata.schema.entity.applications.configuration.internal.DataRetentionConfiguration;
|
import org.openmetadata.schema.entity.applications.configuration.internal.DataRetentionConfiguration;
|
||||||
|
import org.openmetadata.schema.system.EntityStats;
|
||||||
|
import org.openmetadata.schema.system.Stats;
|
||||||
|
import org.openmetadata.schema.system.StepStats;
|
||||||
import org.openmetadata.service.apps.AbstractNativeApplication;
|
import org.openmetadata.service.apps.AbstractNativeApplication;
|
||||||
import org.openmetadata.service.jdbi3.CollectionDAO;
|
import org.openmetadata.service.jdbi3.CollectionDAO;
|
||||||
import org.openmetadata.service.search.SearchRepository;
|
import org.openmetadata.service.search.SearchRepository;
|
||||||
|
import org.openmetadata.service.socket.WebSocketManager;
|
||||||
import org.openmetadata.service.util.JsonUtils;
|
import org.openmetadata.service.util.JsonUtils;
|
||||||
import org.quartz.JobExecutionContext;
|
import org.quartz.JobExecutionContext;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class DataRetention extends AbstractNativeApplication {
|
public class DataRetention extends AbstractNativeApplication {
|
||||||
private static final int BATCH_SIZE = 10_000;
|
private static final int BATCH_SIZE = 10_000;
|
||||||
|
|
||||||
private DataRetentionConfiguration dataRetentionConfiguration;
|
private DataRetentionConfiguration dataRetentionConfiguration;
|
||||||
private final CollectionDAO.EventSubscriptionDAO eventSubscriptionDAO;
|
private final CollectionDAO.EventSubscriptionDAO eventSubscriptionDAO;
|
||||||
|
private final Stats retentionStats = new Stats();
|
||||||
|
private JobExecutionContext jobExecutionContext;
|
||||||
|
|
||||||
|
private AppRunRecord.Status internalStatus = AppRunRecord.Status.COMPLETED;
|
||||||
|
private Map<String, Object> failureDetails = null;
|
||||||
|
|
||||||
public DataRetention(CollectionDAO collectionDAO, SearchRepository searchRepository) {
|
public DataRetention(CollectionDAO collectionDAO, SearchRepository searchRepository) {
|
||||||
super(collectionDAO, searchRepository);
|
super(collectionDAO, searchRepository);
|
||||||
@ -37,44 +54,100 @@ public class DataRetention extends AbstractNativeApplication {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startApp(JobExecutionContext jobExecutionContext) {
|
public void startApp(JobExecutionContext jobExecutionContext) {
|
||||||
|
this.jobExecutionContext = jobExecutionContext;
|
||||||
|
|
||||||
|
try {
|
||||||
|
initializeStatsDefaults();
|
||||||
executeCleanup(dataRetentionConfiguration);
|
executeCleanup(dataRetentionConfiguration);
|
||||||
|
|
||||||
|
jobExecutionContext.getJobDetail().getJobDataMap().put(APP_RUN_STATS, retentionStats);
|
||||||
|
updateRecordToDbAndNotify(null);
|
||||||
|
|
||||||
|
if (internalStatus == AppRunRecord.Status.ACTIVE_ERROR
|
||||||
|
|| internalStatus == AppRunRecord.Status.FAILED) {
|
||||||
|
throw new RuntimeException("Partial failure occurred in DataRetention job");
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception ex) {
|
||||||
|
LOG.error("DataRetention job failed.", ex);
|
||||||
|
internalStatus = AppRunRecord.Status.FAILED;
|
||||||
|
|
||||||
|
failureDetails = new HashMap<>();
|
||||||
|
failureDetails.put("message", ex.getMessage());
|
||||||
|
failureDetails.put("jobStackTrace", ExceptionUtils.getStackTrace(ex));
|
||||||
|
|
||||||
|
updateRecordToDbAndNotify(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeStatsDefaults() {
|
||||||
|
StepStats jobStats =
|
||||||
|
new StepStats().withTotalRecords(0).withSuccessRecords(0).withFailedRecords(0);
|
||||||
|
retentionStats.setJobStats(jobStats);
|
||||||
|
|
||||||
|
EntityStats entityStats = new EntityStats();
|
||||||
|
entityStats.withAdditionalProperty("successful_sent_change_events", new StepStats());
|
||||||
|
entityStats.withAdditionalProperty("change_events", new StepStats());
|
||||||
|
entityStats.withAdditionalProperty("consumers_dlq", new StepStats());
|
||||||
|
retentionStats.setEntityStats(entityStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void executeCleanup(DataRetentionConfiguration config) {
|
public void executeCleanup(DataRetentionConfiguration config) {
|
||||||
if (CommonUtil.nullOrEmpty(config)) {
|
if (config == null) {
|
||||||
|
LOG.warn("DataRetentionConfiguration is null. Skipping cleanup.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
int retentionPeriod = config.getChangeEventRetentionPeriod();
|
||||||
cleanChangeEvents(config.getChangeEventRetentionPeriod());
|
LOG.info("Starting cleanup for change events with retention period: {} days.", retentionPeriod);
|
||||||
|
cleanChangeEvents(retentionPeriod);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Transaction
|
@Transaction
|
||||||
private void cleanChangeEvents(int retentionPeriod) {
|
private void cleanChangeEvents(int retentionPeriod) {
|
||||||
LOG.info(
|
LOG.info("Initiating change events cleanup: Retention = {} days.", retentionPeriod);
|
||||||
"Initiating change events cleanup: Deleting records with a retention period of {} days.",
|
|
||||||
retentionPeriod);
|
|
||||||
long cutoffMillis = getRetentionCutoffMillis(retentionPeriod);
|
long cutoffMillis = getRetentionCutoffMillis(retentionPeriod);
|
||||||
|
|
||||||
int totalDeletedSuccessfulEvents =
|
executeWithStatsTracking(
|
||||||
batchDelete(
|
"successful_sent_change_events",
|
||||||
() ->
|
() ->
|
||||||
eventSubscriptionDAO.deleteSuccessfulSentChangeEventsInBatches(
|
eventSubscriptionDAO.deleteSuccessfulSentChangeEventsInBatches(
|
||||||
cutoffMillis, BATCH_SIZE));
|
cutoffMillis, BATCH_SIZE));
|
||||||
|
|
||||||
int totalDeletedChangeEvents =
|
executeWithStatsTracking(
|
||||||
batchDelete(
|
"change_events",
|
||||||
() -> eventSubscriptionDAO.deleteChangeEventsInBatches(cutoffMillis, BATCH_SIZE));
|
() -> eventSubscriptionDAO.deleteChangeEventsInBatches(cutoffMillis, BATCH_SIZE));
|
||||||
|
|
||||||
int totalDeletedDlq =
|
executeWithStatsTracking(
|
||||||
batchDelete(
|
"consumers_dlq",
|
||||||
() -> eventSubscriptionDAO.deleteConsumersDlqInBatches(cutoffMillis, BATCH_SIZE));
|
() -> eventSubscriptionDAO.deleteConsumersDlqInBatches(cutoffMillis, BATCH_SIZE));
|
||||||
|
|
||||||
LOG.info(
|
LOG.info("Change events cleanup complete.");
|
||||||
"Change events cleanup completed: {} successful_sent_change_events, {} change_events, and {} consumers_dlq records deleted (retention period: {} days).",
|
}
|
||||||
totalDeletedSuccessfulEvents,
|
|
||||||
totalDeletedChangeEvents,
|
private void executeWithStatsTracking(String entity, Supplier<Integer> deleteFunction) {
|
||||||
totalDeletedDlq,
|
int totalDeleted = 0;
|
||||||
retentionPeriod);
|
int totalFailed = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
int deleted = deleteFunction.get();
|
||||||
|
totalDeleted += deleted;
|
||||||
|
if (deleted < BATCH_SIZE) break;
|
||||||
|
} catch (Exception ex) {
|
||||||
|
LOG.error("Failed to clean entity: {}", entity, ex);
|
||||||
|
totalFailed += BATCH_SIZE;
|
||||||
|
internalStatus = AppRunRecord.Status.ACTIVE_ERROR;
|
||||||
|
|
||||||
|
if (failureDetails == null) {
|
||||||
|
failureDetails = new HashMap<>();
|
||||||
|
failureDetails.put("message", ex.getMessage());
|
||||||
|
failureDetails.put("jobStackTrace", ExceptionUtils.getStackTrace(ex));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updateStats(entity, totalDeleted, totalFailed);
|
||||||
}
|
}
|
||||||
|
|
||||||
private long getRetentionCutoffMillis(int retentionPeriodInDays) {
|
private long getRetentionCutoffMillis(int retentionPeriodInDays) {
|
||||||
@ -83,18 +156,41 @@ public class DataRetention extends AbstractNativeApplication {
|
|||||||
.toEpochMilli();
|
.toEpochMilli();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private synchronized void updateStats(String entity, int successCount, int failureCount) {
|
||||||
* Runs a batch delete operation in a loop until fewer than BATCH_SIZE records are deleted in a single iteration.
|
StepStats entityStat =
|
||||||
*/
|
retentionStats
|
||||||
private int batchDelete(Supplier<Integer> deleteFunction) {
|
.getEntityStats()
|
||||||
var totalDeleted = 0;
|
.getAdditionalProperties()
|
||||||
while (true) {
|
.getOrDefault(entity, new StepStats());
|
||||||
var deletedCount = deleteFunction.get();
|
|
||||||
totalDeleted += deletedCount;
|
entityStat.setTotalRecords(entityStat.getTotalRecords() + successCount + failureCount);
|
||||||
if (deletedCount < BATCH_SIZE) {
|
entityStat.setSuccessRecords(entityStat.getSuccessRecords() + successCount);
|
||||||
break;
|
entityStat.setFailedRecords(entityStat.getFailedRecords() + failureCount);
|
||||||
|
|
||||||
|
retentionStats.getEntityStats().withAdditionalProperty(entity, entityStat);
|
||||||
|
|
||||||
|
StepStats jobStats = retentionStats.getJobStats();
|
||||||
|
jobStats.setTotalRecords(jobStats.getTotalRecords() + successCount + failureCount);
|
||||||
|
jobStats.setSuccessRecords(jobStats.getSuccessRecords() + successCount);
|
||||||
|
jobStats.setFailedRecords(jobStats.getFailedRecords() + failureCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void updateRecordToDbAndNotify(Exception error) {
|
||||||
|
AppRunRecord appRecord = getJobRecord(jobExecutionContext);
|
||||||
|
appRecord.setStatus(internalStatus);
|
||||||
|
|
||||||
|
if (failureDetails != null) {
|
||||||
|
appRecord.setFailureContext(
|
||||||
|
new FailureContext().withAdditionalProperty("failure", failureDetails));
|
||||||
}
|
}
|
||||||
return totalDeleted;
|
|
||||||
|
if (WebSocketManager.getInstance() != null) {
|
||||||
|
WebSocketManager.getInstance()
|
||||||
|
.broadCastMessageToAll("data_retention_app_channel", JsonUtils.pojoToJson(appRecord));
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("AppRecord before DB save: {}", JsonUtils.pojoToJson(appRecord));
|
||||||
|
pushAppStatusUpdates(jobExecutionContext, appRecord, true);
|
||||||
|
LOG.info("Final AppRunRecord update: {}", JsonUtils.pojoToJson(appRecord));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -523,6 +523,87 @@ public class AppsResourceTest extends EntityResourceTest<App, CreateApp> {
|
|||||||
String.format("eventsubscription instance for %s not found", subscriptionName));
|
String.format("eventsubscription instance for %s not found", subscriptionName));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void test_data_retention_app_deletes_old_change_events()
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
// Create database service, database, and schema
|
||||||
|
DatabaseServiceResourceTest databaseServiceResourceTest = new DatabaseServiceResourceTest();
|
||||||
|
DatabaseService databaseService =
|
||||||
|
databaseServiceResourceTest.createEntity(
|
||||||
|
databaseServiceResourceTest
|
||||||
|
.createRequest("RetentionTestService")
|
||||||
|
.withServiceType(CreateDatabaseService.DatabaseServiceType.Snowflake),
|
||||||
|
ADMIN_AUTH_HEADERS);
|
||||||
|
|
||||||
|
DatabaseResourceTest databaseResourceTest = new DatabaseResourceTest();
|
||||||
|
Database database =
|
||||||
|
databaseResourceTest.createEntity(
|
||||||
|
databaseResourceTest
|
||||||
|
.createRequest("retention_test_db")
|
||||||
|
.withService(databaseService.getFullyQualifiedName()),
|
||||||
|
ADMIN_AUTH_HEADERS);
|
||||||
|
|
||||||
|
DatabaseSchemaResourceTest schemaResourceTest = new DatabaseSchemaResourceTest();
|
||||||
|
DatabaseSchema schema =
|
||||||
|
schemaResourceTest.createEntity(
|
||||||
|
schemaResourceTest
|
||||||
|
.createRequest("retention_test_schema")
|
||||||
|
.withDatabase(database.getFullyQualifiedName()),
|
||||||
|
ADMIN_AUTH_HEADERS);
|
||||||
|
|
||||||
|
// Create a new table to work with
|
||||||
|
TableResourceTest tableResourceTest = new TableResourceTest();
|
||||||
|
String tableName = "retention_test_table_" + System.currentTimeMillis();
|
||||||
|
|
||||||
|
Table table =
|
||||||
|
tableResourceTest.createEntity(
|
||||||
|
tableResourceTest
|
||||||
|
.createRequest(tableName)
|
||||||
|
.withDatabaseSchema(schema.getFullyQualifiedName()),
|
||||||
|
ADMIN_AUTH_HEADERS);
|
||||||
|
|
||||||
|
// Create some change events by updating the table multiple times
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
Table updatedTable = JsonUtils.deepCopy(table, Table.class);
|
||||||
|
updatedTable.setDescription("Updated description " + i);
|
||||||
|
tableResourceTest.patchEntity(
|
||||||
|
table.getId(), JsonUtils.pojoToJson(updatedTable), updatedTable, ADMIN_AUTH_HEADERS);
|
||||||
|
table = updatedTable;
|
||||||
|
|
||||||
|
// Add a small delay between updates to ensure they're recorded as separate events
|
||||||
|
Thread.sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait a moment for change events to be processed
|
||||||
|
Thread.sleep(1000);
|
||||||
|
|
||||||
|
// Trigger the Data Retention application
|
||||||
|
postTriggerApp("DataRetentionApplication", ADMIN_AUTH_HEADERS);
|
||||||
|
|
||||||
|
// Wait for the app to complete
|
||||||
|
Thread.sleep(5000);
|
||||||
|
|
||||||
|
// Assert the app status is available after trigger
|
||||||
|
assertAppStatusAvailableAfterTrigger("DataRetentionApplication");
|
||||||
|
|
||||||
|
// Assert the app ran with SUCCESS status
|
||||||
|
assertAppRanAfterTriggerWithStatus("DataRetentionApplication", AppRunRecord.Status.SUCCESS);
|
||||||
|
|
||||||
|
// Get the latest run record to check statistics
|
||||||
|
AppRunRecord latestRun = getLatestAppRun("DataRetentionApplication", ADMIN_AUTH_HEADERS);
|
||||||
|
Assertions.assertNotNull(latestRun);
|
||||||
|
|
||||||
|
// Check whether successContext is not null
|
||||||
|
Assertions.assertNotNull(latestRun.getSuccessContext());
|
||||||
|
|
||||||
|
// Clean up - delete the test entities
|
||||||
|
tableResourceTest.deleteEntity(table.getId(), true, true, ADMIN_AUTH_HEADERS);
|
||||||
|
schemaResourceTest.deleteEntity(schema.getId(), true, true, ADMIN_AUTH_HEADERS);
|
||||||
|
databaseResourceTest.deleteEntity(database.getId(), true, true, ADMIN_AUTH_HEADERS);
|
||||||
|
databaseServiceResourceTest.deleteEntity(
|
||||||
|
databaseService.getId(), true, true, ADMIN_AUTH_HEADERS);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void validateCreatedEntity(
|
public void validateCreatedEntity(
|
||||||
App createdEntity, CreateApp request, Map<String, String> authHeaders)
|
App createdEntity, CreateApp request, Map<String, String> authHeaders)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user