From 44df02010aecdbc64a0f90ad5b5ba764c3b96742 Mon Sep 17 00:00:00 2001 From: Onkar Ravgan Date: Thu, 5 Oct 2023 17:57:23 +0530 Subject: [PATCH] Added delete API for Raw Cost Analysis Report Rows (#13435) * Added delete API * review comments * fixed checkstyle * fixed naming * checkstyle --------- Co-authored-by: Ayush Shah --- .../cost_analysis_report_data_processor.py | 10 +++++++ .../metadata/data_insight/source/metadata.py | 2 +- .../ometa/mixins/data_insight_mixin.py | 12 ++++++++- .../source/database/greenplum/metadata.py | 4 +-- .../service/jdbi3/CollectionDAO.java | 3 +++ .../service/jdbi3/ReportDataRepository.java | 12 +++++++++ .../analytics/ReportDataResource.java | 27 +++++++++++++++++++ .../resources/elasticsearch/indexMapping.json | 2 +- 8 files changed, 67 insertions(+), 5 deletions(-) diff --git a/ingestion/src/metadata/data_insight/processor/reports/cost_analysis_report_data_processor.py b/ingestion/src/metadata/data_insight/processor/reports/cost_analysis_report_data_processor.py index e6bed7a4306..42fdab499bc 100644 --- a/ingestion/src/metadata/data_insight/processor/reports/cost_analysis_report_data_processor.py +++ b/ingestion/src/metadata/data_insight/processor/reports/cost_analysis_report_data_processor.py @@ -60,6 +60,16 @@ class RawCostAnalysisReportDataProcessor(DataProcessor): _data_processor_type = ReportDataType.RawCostAnalysisReportData.value + def __init__(self, metadata: OpenMetadata): + super().__init__(metadata) + self.pre_hook = self._pre_hook_fn + + def _pre_hook_fn(self): + """ + Method to delete the previous rows of the RawCostAnalysisReportData type report + """ + self.metadata.delete_report_data(ReportDataType.RawCostAnalysisReportData) + def yield_refined_data(self) -> Iterable[ReportData]: """yield refined data""" for _, value in self._refined_data.items(): diff --git a/ingestion/src/metadata/data_insight/source/metadata.py b/ingestion/src/metadata/data_insight/source/metadata.py index 76014f71f39..c9ad634360e 100644 --- a/ingestion/src/metadata/data_insight/source/metadata.py +++ b/ingestion/src/metadata/data_insight/source/metadata.py @@ -114,7 +114,7 @@ class DataInsightSource(Source): for report_data_type in ReportDataType: logger.info(f"Processing data for report type {report_data_type}") try: - self.metadata.delete_report_data(report_data_type, self.date) + self.metadata.delete_report_data_at_date(report_data_type, self.date) producer = producer_factory.create( report_data_type.value, self.metadata ) diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/data_insight_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/data_insight_mixin.py index 09a52f46a91..aba0932afd5 100644 --- a/ingestion/src/metadata/ingestion/ometa/mixins/data_insight_mixin.py +++ b/ingestion/src/metadata/ingestion/ometa/mixins/data_insight_mixin.py @@ -175,7 +175,9 @@ class DataInsightMixin: event_type_value = event_type.value self.client.delete(f"/analytics/web/events/{event_type_value}/{tmsp}/collect") - def delete_report_data(self, report_data_type: ReportDataType, date: str) -> None: + def delete_report_data_at_date( + self, report_data_type: ReportDataType, date: str + ) -> None: """Delete report data at a specific date for a specific report data type Args: @@ -185,3 +187,11 @@ class DataInsightMixin: self.client.delete( f"/analytics/dataInsights/data/{report_data_type.value}/{date}" ) + + def delete_report_data(self, report_data_type: ReportDataType) -> None: + """Delete report data for a specific report data type + + Args: + report_data_type (ReportDataType): report date type to delete + """ + self.client.delete(f"/analytics/dataInsights/data/{report_data_type.value}") diff --git a/ingestion/src/metadata/ingestion/source/database/greenplum/metadata.py b/ingestion/src/metadata/ingestion/source/database/greenplum/metadata.py index f7ef77e68aa..bcc15a4b88f 100644 --- a/ingestion/src/metadata/ingestion/source/database/greenplum/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/greenplum/metadata.py @@ -116,14 +116,14 @@ class GreenplumSource(CommonDbSourceService): """ @classmethod - def create(cls, config_dict, metadata_config: OpenMetadataConnection): + def create(cls, config_dict, metadata: OpenMetadataConnection): config: WorkflowSource = WorkflowSource.parse_obj(config_dict) connection: GreenplumConnection = config.serviceConnection.__root__.config if not isinstance(connection, GreenplumConnection): raise InvalidSourceException( f"Expected GreenplumConnection, but got {connection}" ) - return cls(config, metadata_config) + return cls(config, metadata) def query_table_names_and_types( self, schema_name: str diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java index 040aab17d2d..b23c659516a 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java @@ -3308,6 +3308,9 @@ public interface CollectionDAO { "DELETE FROM report_data_time_series WHERE entityFQNHash = :reportDataType and DATE(TO_TIMESTAMP((json ->> 'timestamp')::bigint/1000)) = DATE(:date)", connectionType = POSTGRES) void deleteReportDataTypeAtDate(@BindFQN("reportDataType") String reportDataType, @Bind("date") String date); + + @SqlUpdate("DELETE FROM report_data_time_series WHERE entityFQNHash = :reportDataType") + void deletePreviousReportData(@BindFQN("reportDataType") String reportDataType); } interface ProfilerDataTimeSeriesDAO extends EntityTimeSeriesDAO { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ReportDataRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ReportDataRepository.java index d706bc25027..807ecb5cc19 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ReportDataRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ReportDataRepository.java @@ -39,10 +39,22 @@ public class ReportDataRepository extends EntityTimeSeriesRepository cleanUpIndex(reportDataType, date); } + public void deleteReportData(ReportDataType reportDataType) { + ((CollectionDAO.ReportDataTimeSeriesDAO) timeSeriesDao).deletePreviousReportData(reportDataType.value()); + cleanUpPreviousIndex(reportDataType); + } + private void cleanUpIndex(ReportDataType reportDataType, String date) { HashMap params = new HashMap<>(); params.put("date_", date); String scriptTxt = "doc['timestamp'].value.toLocalDate() == LocalDate.parse(params.date_);"; searchRepository.deleteByScript(reportDataType.toString(), scriptTxt, params); } + + private void cleanUpPreviousIndex(ReportDataType reportDataType) { + HashMap params = new HashMap<>(); + params.put("reportDataType_", reportDataType.value()); + String scriptTxt = "doc['reportDataType'].value == params.reportDataType_"; + searchRepository.deleteByScript(reportDataType.toString(), scriptTxt, params); + } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/analytics/ReportDataResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/analytics/ReportDataResource.java index 1dfab61359d..309abb09872 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/analytics/ReportDataResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/analytics/ReportDataResource.java @@ -143,4 +143,31 @@ public class ReportDataResource extends EntityTimeSeriesResource