OpenMetadata/bootstrap/sql/migrations/native/1.1.3/mysql/postDataMigrationSQLScript.sql
Teddy ab3042e8ee
Issue 12297 bis -- Delete Insert logic in the DI workflow (#13058)
* fix: updated ingestion to delete existing data for the ingestion day

* fix: added delete endpoint for the report data

* fix: added migration to delete duplicate data + added tables creation & data deltion logic in a transaction

* fix: made deletion SQL engine aware

* fix: added ES deletion logic back while we work on DI event publisher
2023-09-01 18:01:20 +02:00

50 lines
1.4 KiB
SQL

START TRANSACTION;
-- We'll rank all the runs (timestamps) for every day, and delete all the data but the most recent one.
DELETE FROM report_data_time_series WHERE JSON_EXTRACT(json, '$.id') IN (
select ids FROM (
SELECT
(json ->> '$.id') AS ids,
DENSE_RANK() OVER(PARTITION BY `date` ORDER BY `timestamp` DESC) as denseRank
FROM (
SELECT
*
FROM report_data_time_series rdts
WHERE json ->> '$.reportDataType' = 'WebAnalyticEntityViewReportData'
) duplicates
ORDER BY `date` DESC, `timestamp` DESC
) as dense_ranked
WHERE denseRank != 1
);
DELETE FROM report_data_time_series WHERE JSON_EXTRACT(json, '$.id') IN (
select ids FROM (
SELECT
(json ->> '$.id') AS ids,
DENSE_RANK() OVER(PARTITION BY `date` ORDER BY `timestamp` DESC) as denseRank
FROM (
SELECT
*
FROM report_data_time_series rdts
WHERE json ->> '$.reportDataType' = 'EntityReportData'
) duplicates
ORDER BY `date` DESC, `timestamp` DESC
) as dense_ranked
WHERE denseRank != 1
);
DELETE FROM report_data_time_series WHERE JSON_EXTRACT(json, '$.id') IN (
select ids FROM (
SELECT
(json ->> '$.id') AS ids,
DENSE_RANK() OVER(PARTITION BY `date` ORDER BY `timestamp` DESC) as denseRank
FROM (
SELECT
*
FROM report_data_time_series rdts
WHERE json ->> '$.reportDataType' = 'WebAnalyticUserActivityReportData'
) duplicates
ORDER BY `date` DESC, `timestamp` DESC
) as dense_ranked
WHERE denseRank != 1
);
COMMIT;