mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-07 23:04:32 +00:00

* Prepare the skeleton for generic app registration * Prepare the skeleton for generic app registration * Handle app runner * Prepare the skeleton for generic app registration * Prepare the skeleton for generic app registration * Allow deployment * Fix PII APP * Fix lint * Fix PII APP * Fix PII APP * Prepare config-based external apps * Prepare config-based external apps * Fix lint * Prepare config-based external apps * Fix DI errors * Amend comments
79 lines
3.1 KiB
Python
79 lines
3.1 KiB
Python
# Copyright 2021 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Workflow definition for the data insight
|
|
"""
|
|
from metadata.data_insight.processor.kpi.kpi_runner import KpiRunner
|
|
from metadata.data_insight.source.metadata import DataInsightSource
|
|
from metadata.generated.schema.analytics.basic import WebAnalyticEventType
|
|
from metadata.generated.schema.tests.testSuite import ServiceType
|
|
from metadata.ingestion.api.steps import Sink
|
|
from metadata.utils.importer import import_sink_class
|
|
from metadata.utils.logger import data_insight_logger
|
|
from metadata.utils.time_utils import get_beginning_of_day_timestamp_mill
|
|
from metadata.workflow.ingestion import IngestionWorkflow
|
|
|
|
logger = data_insight_logger()
|
|
|
|
|
|
class DataInsightWorkflow(IngestionWorkflow):
|
|
"""Data insight ingestion workflow implementation"""
|
|
|
|
retention_days = 7
|
|
|
|
def _retrieve_service_connection_if_needed(self, service_type: ServiceType) -> None:
|
|
"""No service connection needed for data insight"""
|
|
return None
|
|
|
|
def _run_kpi_processor(self) -> None:
|
|
"""Run kpi processor. It will run as a step but as a post execution.
|
|
We need the source to be executed first to compute the KPI results"""
|
|
kpi_runner = KpiRunner(self.metadata)
|
|
|
|
for kpi_result in kpi_runner.run():
|
|
# Pick up the sink
|
|
self.steps[0].run(kpi_result)
|
|
|
|
def _clean_up_web_analytics_events(self) -> None:
|
|
"""
|
|
We will delete web analytics events older than `cls.retention_days`
|
|
to prevent storage explosion.
|
|
TODO: deprecate to implement it with backend workflows
|
|
"""
|
|
tmsp = get_beginning_of_day_timestamp_mill(days=self.retention_days)
|
|
for web_analytic_event in WebAnalyticEventType:
|
|
self.metadata.delete_web_analytic_event_before_ts_exclusive(
|
|
web_analytic_event,
|
|
tmsp,
|
|
)
|
|
|
|
def _get_sink(self) -> Sink:
|
|
"""Retrieve sink for data insight workflow"""
|
|
sink_type = "metadata-rest"
|
|
sink_class = import_sink_class(sink_type=sink_type)
|
|
sink_config = {"api_endpoint": self.metadata_config.hostPort}
|
|
sink: Sink = sink_class.create(sink_config, self.metadata)
|
|
logger.debug(f"Sink type:{self.config.sink.type}, {sink_class} configured")
|
|
|
|
return sink
|
|
|
|
def execute_internal(self):
|
|
"""Use parent logic and add step to process KPIs"""
|
|
super().execute_internal()
|
|
self._run_kpi_processor()
|
|
self._clean_up_web_analytics_events()
|
|
|
|
def set_steps(self):
|
|
self.source = DataInsightSource.create(self.metadata) # type: ignore
|
|
|
|
sink = self._get_sink()
|
|
self.steps = (sink,)
|