mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-08 06:53:11 +00:00
* feat(Profiler): Added timeout logic in the profiler workflow * feat(Profiler): Added additional input in workflow UI * fix(Profiler): timeout text blob in UI * fix(Profiler): Return profile_results outside the context manager * Fix(Profiler): Added localizer for timeout label and message * Update ingestion/src/metadata/interfaces/sqalchemy/sqa_profiler_interface.py Co-authored-by: Nahuel <nahuel@getcollate.io> Co-authored-by: Nahuel <nahuel@getcollate.io>
This commit is contained in:
parent
7aed6d340b
commit
3856d63f3a
@ -40,6 +40,7 @@ class ProfilerInterfaceArgs(BaseModel):
|
|||||||
table_sample_precentage: Optional[Union[float, int]]
|
table_sample_precentage: Optional[Union[float, int]]
|
||||||
table_sample_query: Optional[Union[int, str]]
|
table_sample_query: Optional[Union[int, str]]
|
||||||
table_partition_config: Optional[PartitionProfilerConfig]
|
table_partition_config: Optional[PartitionProfilerConfig]
|
||||||
|
timeout_seconds: Optional[int]
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
|||||||
@ -39,6 +39,7 @@ from metadata.utils.connections import (
|
|||||||
create_and_bind_thread_safe_session,
|
create_and_bind_thread_safe_session,
|
||||||
get_connection,
|
get_connection,
|
||||||
)
|
)
|
||||||
|
from metadata.utils.custom_thread_pool import CustomThreadPoolExecutor
|
||||||
from metadata.utils.dispatch import valuedispatch
|
from metadata.utils.dispatch import valuedispatch
|
||||||
from metadata.utils.logger import profiler_interface_registry_logger
|
from metadata.utils.logger import profiler_interface_registry_logger
|
||||||
|
|
||||||
@ -86,6 +87,8 @@ class SQAProfilerInterface(SQAInterfaceMixin, ProfilerProtocol):
|
|||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.timeout_seconds = profiler_interface_args.timeout_seconds
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _session_factory(service_connection_config):
|
def _session_factory(service_connection_config):
|
||||||
"""Create thread safe session that will be automatically
|
"""Create thread safe session that will be automatically
|
||||||
@ -369,19 +372,23 @@ class SQAProfilerInterface(SQAInterfaceMixin, ProfilerProtocol):
|
|||||||
"""get all profiler metrics"""
|
"""get all profiler metrics"""
|
||||||
logger.info(f"Computing metrics with {self._thread_count} threads.")
|
logger.info(f"Computing metrics with {self._thread_count} threads.")
|
||||||
profile_results = {"table": dict(), "columns": defaultdict(dict)}
|
profile_results = {"table": dict(), "columns": defaultdict(dict)}
|
||||||
with concurrent.futures.ThreadPoolExecutor(
|
with CustomThreadPoolExecutor(max_workers=self._thread_count) as pool:
|
||||||
max_workers=self._thread_count
|
|
||||||
) as executor:
|
|
||||||
futures = [
|
futures = [
|
||||||
executor.submit(
|
pool.submit(
|
||||||
self.compute_metrics_in_thread,
|
self.compute_metrics_in_thread,
|
||||||
*metric_func,
|
*metric_func,
|
||||||
)
|
)
|
||||||
for metric_func in metric_funcs
|
for metric_func in metric_funcs
|
||||||
]
|
]
|
||||||
|
|
||||||
for future in concurrent.futures.as_completed(futures):
|
for future in futures:
|
||||||
profile, column, metric_type = future.result()
|
if future.cancelled():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
profile, column, metric_type = future.result(
|
||||||
|
timeout=self.timeout_seconds
|
||||||
|
)
|
||||||
if metric_type != MetricTypes.System.value and not isinstance(
|
if metric_type != MetricTypes.System.value and not isinstance(
|
||||||
profile, dict
|
profile, dict
|
||||||
):
|
):
|
||||||
@ -398,6 +405,12 @@ class SQAProfilerInterface(SQAInterfaceMixin, ProfilerProtocol):
|
|||||||
**profile,
|
**profile,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
except concurrent.futures.TimeoutError as exc:
|
||||||
|
pool.shutdown39(wait=True, cancel_futures=True)
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.error(f"Operation was cancelled due to TimeoutError - {exc}")
|
||||||
|
raise concurrent.futures.TimeoutError
|
||||||
|
|
||||||
return profile_results
|
return profile_results
|
||||||
|
|
||||||
def fetch_sample_data(self, table) -> TableData:
|
def fetch_sample_data(self, table) -> TableData:
|
||||||
|
|||||||
@ -266,6 +266,7 @@ class ProfilerWorkflow(WorkflowStatusMixin):
|
|||||||
table_partition_config=self.get_partition_details(self._table_entity)
|
table_partition_config=self.get_partition_details(self._table_entity)
|
||||||
if not self.get_profile_query(self._table_entity)
|
if not self.get_profile_query(self._table_entity)
|
||||||
else None,
|
else None,
|
||||||
|
timeout_seconds=self.source_config.timeoutSeconds,
|
||||||
)
|
)
|
||||||
if isinstance(service_connection_config, DatalakeConnection):
|
if isinstance(service_connection_config, DatalakeConnection):
|
||||||
return DataLakeProfilerInterface(self._profiler_interface_args)
|
return DataLakeProfilerInterface(self._profiler_interface_args)
|
||||||
|
|||||||
49
ingestion/src/metadata/utils/custom_thread_pool.py
Normal file
49
ingestion/src/metadata/utils/custom_thread_pool.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
Custom thread pool Executor
|
||||||
|
"""
|
||||||
|
|
||||||
|
import queue
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
|
||||||
|
class CustomThreadPoolExecutor(ThreadPoolExecutor):
|
||||||
|
"""In python 3.9 shutdown will stop the pool Executor.
|
||||||
|
We replicate it here to add support in 3.7 and 3.8
|
||||||
|
"""
|
||||||
|
|
||||||
|
def shutdown39(self, wait=True, *, cancel_futures=False):
|
||||||
|
"""replicate shutdown from 3.9
|
||||||
|
|
||||||
|
Args:
|
||||||
|
wait (bool, optional): Defaults to True.
|
||||||
|
cancel_futures (bool, optional): Defaults to False.
|
||||||
|
"""
|
||||||
|
with self._shutdown_lock:
|
||||||
|
self._shutdown = True
|
||||||
|
if cancel_futures:
|
||||||
|
# Drain all work items from the queue, and then cancel their
|
||||||
|
# associated futures.
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
work_item = self._work_queue.get_nowait()
|
||||||
|
except queue.Empty:
|
||||||
|
break
|
||||||
|
if work_item is not None:
|
||||||
|
work_item.future.cancel()
|
||||||
|
|
||||||
|
# Send a wake-up to prevent threads calling
|
||||||
|
# _work_queue.get(block=True) from permanently blocking.
|
||||||
|
self._work_queue.put(None)
|
||||||
|
if wait:
|
||||||
|
for thread_ in self._threads:
|
||||||
|
thread_.join()
|
||||||
@ -13,6 +13,7 @@
|
|||||||
Test Profiler behavior
|
Test Profiler behavior
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
from concurrent.futures import TimeoutError
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
@ -230,6 +231,28 @@ class ProfilerTest(TestCase):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_profiler_with_timeout(self):
|
||||||
|
"""check timeout is properly used"""
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
SQAProfilerInterface, "_convert_table_to_orm_object", return_value=User
|
||||||
|
):
|
||||||
|
sqa_profiler_interface = SQAProfilerInterface(
|
||||||
|
profiler_interface_args=ProfilerInterfaceArgs(
|
||||||
|
service_connection_config=self.sqlite_conn,
|
||||||
|
table_entity=self.table_entity,
|
||||||
|
ometa_client=None,
|
||||||
|
timeout_seconds=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
simple = DefaultProfiler(
|
||||||
|
profiler_interface=sqa_profiler_interface,
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(TimeoutError):
|
||||||
|
simple.compute_metrics()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def tearDownClass(cls) -> None:
|
def tearDownClass(cls) -> None:
|
||||||
os.remove(cls.db_path)
|
os.remove(cls.db_path)
|
||||||
|
|||||||
@ -45,6 +45,11 @@
|
|||||||
"description": "Number of threads to use during metric computations",
|
"description": "Number of threads to use during metric computations",
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"default": 5
|
"default": 5
|
||||||
|
},
|
||||||
|
"timeoutSeconds": {
|
||||||
|
"description": "Profiler Timeout in Seconds",
|
||||||
|
"type": "integer",
|
||||||
|
"default": 43200
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false
|
"additionalProperties": false
|
||||||
|
|||||||
@ -197,6 +197,9 @@ const AddIngestion = ({
|
|||||||
const [threadCount, setThreadCount] = useState(
|
const [threadCount, setThreadCount] = useState(
|
||||||
(data?.sourceConfig.config as ConfigClass)?.threadCount ?? 5
|
(data?.sourceConfig.config as ConfigClass)?.threadCount ?? 5
|
||||||
);
|
);
|
||||||
|
const [timeoutSeconds, setTimeoutSeconds] = useState(
|
||||||
|
(data?.sourceConfig.config as ConfigClass)?.timeoutSeconds ?? 43200
|
||||||
|
);
|
||||||
const [dashboardFilterPattern, setDashboardFilterPattern] =
|
const [dashboardFilterPattern, setDashboardFilterPattern] =
|
||||||
useState<FilterPattern>(
|
useState<FilterPattern>(
|
||||||
(data?.sourceConfig.config as ConfigClass)?.dashboardFilterPattern ??
|
(data?.sourceConfig.config as ConfigClass)?.dashboardFilterPattern ??
|
||||||
@ -549,6 +552,7 @@ const AddIngestion = ({
|
|||||||
generateSampleData: ingestSampleData,
|
generateSampleData: ingestSampleData,
|
||||||
profileSample: profileSample,
|
profileSample: profileSample,
|
||||||
threadCount: threadCount,
|
threadCount: threadCount,
|
||||||
|
timeoutSeconds: timeoutSeconds,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -749,6 +753,7 @@ const AddIngestion = ({
|
|||||||
handleShowFilter={handleShowFilter}
|
handleShowFilter={handleShowFilter}
|
||||||
handleStageFileLocation={(val) => setStageFileLocation(val)}
|
handleStageFileLocation={(val) => setStageFileLocation(val)}
|
||||||
handleThreadCount={setThreadCount}
|
handleThreadCount={setThreadCount}
|
||||||
|
handleTimeoutSeconds={setTimeoutSeconds}
|
||||||
includeLineage={includeLineage}
|
includeLineage={includeLineage}
|
||||||
includeTags={includeTag}
|
includeTags={includeTag}
|
||||||
includeView={includeView}
|
includeView={includeView}
|
||||||
@ -775,6 +780,7 @@ const AddIngestion = ({
|
|||||||
stageFileLocation={stageFileLocation}
|
stageFileLocation={stageFileLocation}
|
||||||
tableFilterPattern={tableFilterPattern}
|
tableFilterPattern={tableFilterPattern}
|
||||||
threadCount={threadCount}
|
threadCount={threadCount}
|
||||||
|
timeoutSeconds={timeoutSeconds}
|
||||||
topicFilterPattern={topicFilterPattern}
|
topicFilterPattern={topicFilterPattern}
|
||||||
useFqnFilter={useFqnFilter}
|
useFqnFilter={useFqnFilter}
|
||||||
onCancel={handleCancelClick}
|
onCancel={handleCancelClick}
|
||||||
|
|||||||
@ -103,6 +103,8 @@ const mockConfigureIngestion: ConfigureIngestionProps = {
|
|||||||
handleDatasetServiceName: jest.fn(),
|
handleDatasetServiceName: jest.fn(),
|
||||||
threadCount: 5,
|
threadCount: 5,
|
||||||
handleThreadCount: jest.fn(),
|
handleThreadCount: jest.fn(),
|
||||||
|
timeoutSeconds: 43200,
|
||||||
|
handleTimeoutSeconds: jest.fn(),
|
||||||
useFqnFilter: false,
|
useFqnFilter: false,
|
||||||
onUseFqnFilterClick: jest.fn(),
|
onUseFqnFilterClick: jest.fn(),
|
||||||
};
|
};
|
||||||
|
|||||||
@ -14,6 +14,7 @@
|
|||||||
import { isNil } from 'lodash';
|
import { isNil } from 'lodash';
|
||||||
import { EditorContentRef } from 'Models';
|
import { EditorContentRef } from 'Models';
|
||||||
import React, { Fragment, useRef, useState } from 'react';
|
import React, { Fragment, useRef, useState } from 'react';
|
||||||
|
import { useTranslation } from 'react-i18next';
|
||||||
import { FilterPatternEnum } from '../../../enums/filterPattern.enum';
|
import { FilterPatternEnum } from '../../../enums/filterPattern.enum';
|
||||||
import { FormSubmitType } from '../../../enums/form.enum';
|
import { FormSubmitType } from '../../../enums/form.enum';
|
||||||
import { ServiceCategory } from '../../../enums/service.enum';
|
import { ServiceCategory } from '../../../enums/service.enum';
|
||||||
@ -57,6 +58,7 @@ const ConfigureIngestion = ({
|
|||||||
queryLogDuration,
|
queryLogDuration,
|
||||||
stageFileLocation,
|
stageFileLocation,
|
||||||
threadCount,
|
threadCount,
|
||||||
|
timeoutSeconds,
|
||||||
resultLimit,
|
resultLimit,
|
||||||
enableDebugLog,
|
enableDebugLog,
|
||||||
profileSample,
|
profileSample,
|
||||||
@ -78,6 +80,7 @@ const ConfigureIngestion = ({
|
|||||||
handleStageFileLocation,
|
handleStageFileLocation,
|
||||||
handleResultLimit,
|
handleResultLimit,
|
||||||
handleThreadCount,
|
handleThreadCount,
|
||||||
|
handleTimeoutSeconds,
|
||||||
useFqnFilter,
|
useFqnFilter,
|
||||||
onUseFqnFilterClick,
|
onUseFqnFilterClick,
|
||||||
onCancel,
|
onCancel,
|
||||||
@ -85,6 +88,7 @@ const ConfigureIngestion = ({
|
|||||||
formType,
|
formType,
|
||||||
}: ConfigureIngestionProps) => {
|
}: ConfigureIngestionProps) => {
|
||||||
const markdownRef = useRef<EditorContentRef>();
|
const markdownRef = useRef<EditorContentRef>();
|
||||||
|
const { t } = useTranslation();
|
||||||
|
|
||||||
const getIngestSampleToggle = (label: string, desc: string) => {
|
const getIngestSampleToggle = (label: string, desc: string) => {
|
||||||
return (
|
return (
|
||||||
@ -180,6 +184,27 @@ const ConfigureIngestion = ({
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getTimeoutSeconds = () => {
|
||||||
|
return (
|
||||||
|
<div>
|
||||||
|
<label>{t('label.profiler-timeout-seconds-label')}</label>
|
||||||
|
<p className="tw-text-grey-muted tw-mt-1 tw-mb-2 tw-text-sm">
|
||||||
|
{t('message.profiler-timeout-seconds-message')}
|
||||||
|
</p>
|
||||||
|
<input
|
||||||
|
className="tw-form-inputs tw-form-inputs-padding tw-w-24"
|
||||||
|
data-testid="timeoutSeconds"
|
||||||
|
id="timeoutSeconds"
|
||||||
|
name="timeoutSeconds"
|
||||||
|
placeholder="43200"
|
||||||
|
type="number"
|
||||||
|
value={timeoutSeconds}
|
||||||
|
onChange={(e) => handleTimeoutSeconds(parseInt(e.target.value))}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
const getDatabaseFieldToggles = () => {
|
const getDatabaseFieldToggles = () => {
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
@ -660,6 +685,8 @@ const ConfigureIngestion = ({
|
|||||||
{getSeparator('')}
|
{getSeparator('')}
|
||||||
{getThreadCount()}
|
{getThreadCount()}
|
||||||
{getSeparator('')}
|
{getSeparator('')}
|
||||||
|
{getTimeoutSeconds()}
|
||||||
|
{getSeparator('')}
|
||||||
{getIngestSampleToggle(
|
{getIngestSampleToggle(
|
||||||
'Ingest Sample Data',
|
'Ingest Sample Data',
|
||||||
'Extract sample data from each profile'
|
'Extract sample data from each profile'
|
||||||
|
|||||||
@ -90,6 +90,7 @@ export interface ConfigureIngestionProps {
|
|||||||
queryLogDuration: number;
|
queryLogDuration: number;
|
||||||
stageFileLocation: string;
|
stageFileLocation: string;
|
||||||
resultLimit: number;
|
resultLimit: number;
|
||||||
|
timeoutSeconds: number;
|
||||||
handleIngestionName: (value: string) => void;
|
handleIngestionName: (value: string) => void;
|
||||||
handleDatasetServiceName: (value: string[]) => void;
|
handleDatasetServiceName: (value: string[]) => void;
|
||||||
handleDescription?: (value: string) => void;
|
handleDescription?: (value: string) => void;
|
||||||
@ -109,6 +110,7 @@ export interface ConfigureIngestionProps {
|
|||||||
handleStageFileLocation: (value: string) => void;
|
handleStageFileLocation: (value: string) => void;
|
||||||
handleResultLimit: (value: number) => void;
|
handleResultLimit: (value: number) => void;
|
||||||
handleThreadCount: (value: number) => void;
|
handleThreadCount: (value: number) => void;
|
||||||
|
handleTimeoutSeconds: (value: number) => void;
|
||||||
onCancel: () => void;
|
onCancel: () => void;
|
||||||
onNext: () => void;
|
onNext: () => void;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -546,6 +546,7 @@
|
|||||||
"table-profile": "Table Profile",
|
"table-profile": "Table Profile",
|
||||||
"column-profile": "Column Profile",
|
"column-profile": "Column Profile",
|
||||||
"applied-advanced-search": "Applied advanced search",
|
"applied-advanced-search": "Applied advanced search",
|
||||||
|
"profiler-timeout-seconds-label": "Timeout in Seconds",
|
||||||
"add-deploy": "Add & Deploy",
|
"add-deploy": "Add & Deploy",
|
||||||
"dbt-Configuration-source": "DBT Configuration Source",
|
"dbt-Configuration-source": "DBT Configuration Source",
|
||||||
"select-dbt-source": "Select DBT Source",
|
"select-dbt-source": "Select DBT Source",
|
||||||
@ -669,6 +670,7 @@
|
|||||||
"announcement-created-successfully": "Announcement created successfully!",
|
"announcement-created-successfully": "Announcement created successfully!",
|
||||||
"no-profiler-message": "Data Profiler is an optional configuration in Ingestion. Please enable the data profiler by following the documentation",
|
"no-profiler-message": "Data Profiler is an optional configuration in Ingestion. Please enable the data profiler by following the documentation",
|
||||||
"advanced-search-message": "Discover the right data assets using the syntax editor with and/or conditions.",
|
"advanced-search-message": "Discover the right data assets using the syntax editor with and/or conditions.",
|
||||||
|
"profiler-timeout-seconds-message": "Optional number setting the timeout in seconds for the profiler. If the timeout is reached the profiler will wait for any pending queries to terminated its execution.",
|
||||||
"instance-identifier": "Name that identifies this configuration instance uniquely.",
|
"instance-identifier": "Name that identifies this configuration instance uniquely.",
|
||||||
"fetch-dbt-files": "Available sources to fetch DBT catalog and manifest files."
|
"fetch-dbt-files": "Available sources to fetch DBT catalog and manifest files."
|
||||||
},
|
},
|
||||||
|
|||||||
@ -426,7 +426,8 @@
|
|||||||
"updated-by": "Mis à jour par",
|
"updated-by": "Mis à jour par",
|
||||||
"on-lowercase": "sur",
|
"on-lowercase": "sur",
|
||||||
"webhook-display-text": "Webhook {{displayText}}",
|
"webhook-display-text": "Webhook {{displayText}}",
|
||||||
"endpoint": "Point de Terminaison"
|
"endpoint": "Point de Terminaison",
|
||||||
|
"profiler-timeout-seconds-label": "Délai d'Attente en Secondes"
|
||||||
},
|
},
|
||||||
"message": {
|
"message": {
|
||||||
"service-email-required": "Un Email pour le Service account est requis",
|
"service-email-required": "Un Email pour le Service account est requis",
|
||||||
@ -519,7 +520,8 @@
|
|||||||
"field-text-is-required": "{{fieldText}} est requis.",
|
"field-text-is-required": "{{fieldText}} est requis.",
|
||||||
"entity-owned-by-name": "Cette Resource appartient à {{entityOwner}}",
|
"entity-owned-by-name": "Cette Resource appartient à {{entityOwner}}",
|
||||||
"and-followed-owned-by-name": "et l'équipe suivante appartient à {{userName}}",
|
"and-followed-owned-by-name": "et l'équipe suivante appartient à {{userName}}",
|
||||||
"field-text-is-invalid": "{{fieldText}} est invalide."
|
"field-text-is-invalid": "{{fieldText}} est invalide.",
|
||||||
|
"profiler-timeout-seconds-message": "Le délai d'attente en seconde est optionel pour le profilage. Si le délai d'attente est atteint le profilage attendra la fin d'éxécution des requêtes qui on débutées pour terminer son éxécution."
|
||||||
},
|
},
|
||||||
"server": {
|
"server": {
|
||||||
"no-followed-entities": "Vous ne suivez aucunne resources pour le moment.",
|
"no-followed-entities": "Vous ne suivez aucunne resources pour le moment.",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user