mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-16 11:18:33 +00:00
parent
7292695bd3
commit
bd4071bd64
@ -35,42 +35,3 @@ class DynamicTypedConfig(ConfigModel):
|
|||||||
|
|
||||||
class WorkflowExecutionError(Exception):
|
class WorkflowExecutionError(Exception):
|
||||||
"""An error occurred when executing the workflow"""
|
"""An error occurred when executing the workflow"""
|
||||||
|
|
||||||
|
|
||||||
class IncludeFilterPattern(ConfigModel):
|
|
||||||
"""A class to store allow deny regexes"""
|
|
||||||
|
|
||||||
includes: List[str] = [".*"]
|
|
||||||
excludes: List[str] = []
|
|
||||||
alphabet: str = "[A-Za-z0-9 _.-]"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def alphabet_pattern(self):
|
|
||||||
return re.compile(f"^{self.alphabet}+$")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def allow_all(cls):
|
|
||||||
return IncludeFilterPattern()
|
|
||||||
|
|
||||||
def included(self, string: str) -> bool:
|
|
||||||
try:
|
|
||||||
for exclude in self.excludes:
|
|
||||||
if re.match(exclude, string):
|
|
||||||
return False
|
|
||||||
|
|
||||||
for include in self.includes:
|
|
||||||
if re.match(include, string):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
except Exception as err:
|
|
||||||
raise Exception("Regex Error: {}".format(err))
|
|
||||||
|
|
||||||
def is_fully_specified_include_list(self) -> bool:
|
|
||||||
for filter_pattern in self.includes:
|
|
||||||
if not self.alphabet_pattern.match(filter_pattern):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def get_allowed_list(self):
|
|
||||||
assert self.is_fully_specified_include_list()
|
|
||||||
return [a for a in self.includes if self.included(a)]
|
|
||||||
|
@ -16,6 +16,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, Iterable, List, Optional, Tuple
|
from typing import Dict, Iterable, List, Optional, Tuple
|
||||||
|
|
||||||
@ -56,7 +57,6 @@ from metadata.ingestion.models.ometa_table_db import OMetaDatabaseAndTable
|
|||||||
from metadata.ingestion.models.table_metadata import DeleteTable
|
from metadata.ingestion.models.table_metadata import DeleteTable
|
||||||
from metadata.ingestion.ometa.client import APIError
|
from metadata.ingestion.ometa.client import APIError
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.sql_source_common import SQLSourceStatus
|
|
||||||
from metadata.orm_profiler.orm.converter import ometa_to_orm
|
from metadata.orm_profiler.orm.converter import ometa_to_orm
|
||||||
from metadata.orm_profiler.profiler.default import DefaultProfiler
|
from metadata.orm_profiler.profiler.default import DefaultProfiler
|
||||||
from metadata.utils.column_type_parser import ColumnTypeParser
|
from metadata.utils.column_type_parser import ColumnTypeParser
|
||||||
@ -66,6 +66,26 @@ from metadata.utils.helpers import get_database_service_or_create, ingest_lineag
|
|||||||
logger: logging.Logger = logging.getLogger(__name__)
|
logger: logging.Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SQLSourceStatus(SourceStatus):
|
||||||
|
"""
|
||||||
|
Reports the source status after ingestion
|
||||||
|
"""
|
||||||
|
|
||||||
|
success: List[str] = field(default_factory=list)
|
||||||
|
failures: List[str] = field(default_factory=list)
|
||||||
|
warnings: List[str] = field(default_factory=list)
|
||||||
|
filtered: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
def scanned(self, record: str) -> None:
|
||||||
|
self.success.append(record)
|
||||||
|
logger.info(f"Table Scanned: {record}")
|
||||||
|
|
||||||
|
def filter(self, record: str, err: str) -> None:
|
||||||
|
self.filtered.append(record)
|
||||||
|
logger.warning(f"Filtered Table {record} due to {err}")
|
||||||
|
|
||||||
|
|
||||||
def _get_table_description(schema: str, table: str, inspector: Inspector) -> str:
|
def _get_table_description(schema: str, table: str, inspector: Inspector) -> str:
|
||||||
description = None
|
description = None
|
||||||
try:
|
try:
|
||||||
@ -105,7 +125,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
|
|||||||
self.service = get_database_service_or_create(config, metadata_config)
|
self.service = get_database_service_or_create(config, metadata_config)
|
||||||
self.metadata = OpenMetadata(metadata_config)
|
self.metadata = OpenMetadata(metadata_config)
|
||||||
self.status = SQLSourceStatus()
|
self.status = SQLSourceStatus()
|
||||||
self.engine = get_engine(config=self.config)
|
self.engine = get_engine(workflow_source=self.config)
|
||||||
self._session = None # We will instantiate this just if needed
|
self._session = None # We will instantiate this just if needed
|
||||||
self.connection = self.engine.connect()
|
self.connection = self.engine.connect()
|
||||||
self.data_profiler = None
|
self.data_profiler = None
|
||||||
|
@ -1,100 +0,0 @@
|
|||||||
import logging
|
|
||||||
from abc import abstractmethod
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import List, Optional
|
|
||||||
from urllib.parse import quote_plus
|
|
||||||
|
|
||||||
from pydantic import SecretStr
|
|
||||||
|
|
||||||
from metadata.generated.schema.entity.services.databaseService import (
|
|
||||||
DatabaseServiceType,
|
|
||||||
)
|
|
||||||
from metadata.generated.schema.operations.pipelines.databaseServiceMetadataPipeline import (
|
|
||||||
DatabaseServiceMetadataPipeline,
|
|
||||||
)
|
|
||||||
from metadata.ingestion.api.source import SourceStatus
|
|
||||||
|
|
||||||
logger: logging.Logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SQLSourceStatus(SourceStatus):
|
|
||||||
"""
|
|
||||||
Reports the source status after ingestion
|
|
||||||
"""
|
|
||||||
|
|
||||||
success: List[str] = field(default_factory=list)
|
|
||||||
failures: List[str] = field(default_factory=list)
|
|
||||||
warnings: List[str] = field(default_factory=list)
|
|
||||||
filtered: List[str] = field(default_factory=list)
|
|
||||||
|
|
||||||
def scanned(self, record: str) -> None:
|
|
||||||
self.success.append(record)
|
|
||||||
logger.info(f"Table Scanned: {record}")
|
|
||||||
|
|
||||||
def filter(self, record: str, err: str) -> None:
|
|
||||||
self.filtered.append(record)
|
|
||||||
logger.warning(f"Filtered Table {record} due to {err}")
|
|
||||||
|
|
||||||
|
|
||||||
def wbuild_sql_source_connection_url(
|
|
||||||
host_port: str,
|
|
||||||
scheme: str,
|
|
||||||
username: Optional[str] = None,
|
|
||||||
password: Optional[SecretStr] = None,
|
|
||||||
database: Optional[str] = None,
|
|
||||||
options: Optional[dict] = None,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Helper function to prepare the db URL
|
|
||||||
"""
|
|
||||||
|
|
||||||
url = f"{scheme}://"
|
|
||||||
if username is not None:
|
|
||||||
url += f"{username}"
|
|
||||||
if password is not None:
|
|
||||||
url += f":{quote_plus(password.get_secret_value())}"
|
|
||||||
url += "@"
|
|
||||||
url += f"{host_port}"
|
|
||||||
if database:
|
|
||||||
url += f"/{database}"
|
|
||||||
|
|
||||||
if options is not None:
|
|
||||||
if database is None:
|
|
||||||
url += "/"
|
|
||||||
params = "&".join(
|
|
||||||
f"{key}={quote_plus(value)}" for (key, value) in options.items() if value
|
|
||||||
)
|
|
||||||
url = f"{url}?{params}"
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
class SQLConnectionConfig(DatabaseServiceMetadataPipeline):
|
|
||||||
"""
|
|
||||||
Config class containing all supported
|
|
||||||
configurations for an SQL source, including
|
|
||||||
data profiling and DBT generated information.
|
|
||||||
"""
|
|
||||||
|
|
||||||
service_name: str
|
|
||||||
db_schema: Optional[str] = None
|
|
||||||
options: dict = {}
|
|
||||||
connect_args: dict = {}
|
|
||||||
include_tables: Optional[bool] = True
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_connection_url(self):
|
|
||||||
return build_sql_source_connection_url(
|
|
||||||
host_port=self.hostPort,
|
|
||||||
scheme=self.scheme,
|
|
||||||
username=self.username,
|
|
||||||
password=self.password,
|
|
||||||
database=self.database,
|
|
||||||
options=self.options,
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_service_type(self) -> DatabaseServiceType:
|
|
||||||
return DatabaseServiceType[self.type]
|
|
||||||
|
|
||||||
def get_service_name(self) -> str:
|
|
||||||
return self.service_name
|
|
@ -26,6 +26,9 @@ from metadata.config.common import WorkflowExecutionError
|
|||||||
from metadata.config.workflow import get_ingestion_source, get_processor, get_sink
|
from metadata.config.workflow import get_ingestion_source, get_processor, get_sink
|
||||||
from metadata.generated.schema.entity.data.database import Database
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
from metadata.generated.schema.entity.data.table import Table
|
from metadata.generated.schema.entity.data.table import Table
|
||||||
|
from metadata.generated.schema.metadataIngestion.databaseServiceMetadataPipeline import (
|
||||||
|
DatabaseServiceMetadataPipeline,
|
||||||
|
)
|
||||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
OpenMetadataServerConfig,
|
OpenMetadataServerConfig,
|
||||||
OpenMetadataWorkflowConfig,
|
OpenMetadataWorkflowConfig,
|
||||||
@ -34,14 +37,11 @@ from metadata.ingestion.api.processor import Processor
|
|||||||
from metadata.ingestion.api.sink import Sink
|
from metadata.ingestion.api.sink import Sink
|
||||||
from metadata.ingestion.api.source import Source
|
from metadata.ingestion.api.source import Source
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.sql_source import SQLSource
|
from metadata.ingestion.source.sql_source import SQLSource, SQLSourceStatus
|
||||||
from metadata.ingestion.source.sql_source_common import (
|
|
||||||
SQLConnectionConfig,
|
|
||||||
SQLSourceStatus,
|
|
||||||
)
|
|
||||||
from metadata.orm_profiler.api.models import ProfilerProcessorConfig, ProfilerResponse
|
from metadata.orm_profiler.api.models import ProfilerProcessorConfig, ProfilerResponse
|
||||||
from metadata.orm_profiler.utils import logger
|
from metadata.orm_profiler.utils import logger
|
||||||
from metadata.utils.engines import create_and_bind_session, get_engine
|
from metadata.utils.engines import create_and_bind_session
|
||||||
|
from metadata.utils.filters import filter_by_schema, filter_by_table
|
||||||
|
|
||||||
logger = logger()
|
logger = logger()
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ class ProfilerWorkflow:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# We will use the existing sources to build the Engine
|
# We will use the existing sources to build the Engine
|
||||||
self.source = get_ingestion_source(
|
self.source: Source = get_ingestion_source(
|
||||||
source_type=self.config.source.type,
|
source_type=self.config.source.type,
|
||||||
source_config=self.config.source,
|
source_config=self.config.source,
|
||||||
metadata_config=self.metadata_config,
|
metadata_config=self.metadata_config,
|
||||||
@ -76,7 +76,9 @@ class ProfilerWorkflow:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Init and type the source config
|
# Init and type the source config
|
||||||
self.source_config: SQLConnectionConfig = self.source.config
|
self.source_config: DatabaseServiceMetadataPipeline = (
|
||||||
|
self.config.source.sourceConfig.config
|
||||||
|
)
|
||||||
self.source_status = SQLSourceStatus()
|
self.source_status = SQLSourceStatus()
|
||||||
|
|
||||||
self.processor = get_processor(
|
self.processor = get_processor(
|
||||||
@ -85,7 +87,7 @@ class ProfilerWorkflow:
|
|||||||
metadata_config=self.metadata_config,
|
metadata_config=self.metadata_config,
|
||||||
_from="orm_profiler",
|
_from="orm_profiler",
|
||||||
# Pass the session as kwargs for the profiler
|
# Pass the session as kwargs for the profiler
|
||||||
session=create_and_bind_session(get_engine(self.source_config)),
|
session=create_and_bind_session(self.source.engine),
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.config.sink:
|
if self.config.sink:
|
||||||
@ -121,20 +123,22 @@ class ProfilerWorkflow:
|
|||||||
for table in tables:
|
for table in tables:
|
||||||
|
|
||||||
# Validate schema
|
# Validate schema
|
||||||
if not self.source_config.schema_filter_pattern.included(
|
if filter_by_schema(
|
||||||
table.database.name
|
schema_filter_pattern=self.source_config.schemaFilterPattern,
|
||||||
|
schema_name=table.databaseSchema.name,
|
||||||
):
|
):
|
||||||
self.source_status.filter(
|
self.source_status.filter(
|
||||||
table.database.name, "Schema pattern not allowed"
|
table.databaseSchema.name, "Schema pattern not allowed"
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Validate database
|
# Validate database
|
||||||
if not self.source_config.table_filter_pattern.included(
|
if filter_by_table(
|
||||||
str(table.name.__root__)
|
table_filter_pattern=self.source_config.tableFilterPattern,
|
||||||
|
table_name=str(table.name.__root__),
|
||||||
):
|
):
|
||||||
self.source_status.filter(
|
self.source_status.filter(
|
||||||
table.fullyQualifiedName.__root__, "Table name pattern not allowed"
|
table.name.__root__, "Table name pattern not allowed"
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -27,12 +27,12 @@ from metadata.utils.source_connections import get_connection_url
|
|||||||
logger = logging.getLogger("Utils")
|
logger = logging.getLogger("Utils")
|
||||||
|
|
||||||
|
|
||||||
def get_engine(config: WorkflowSource, verbose: bool = False) -> Engine:
|
def get_engine(workflow_source: WorkflowSource, verbose: bool = False) -> Engine:
|
||||||
"""
|
"""
|
||||||
Given an SQL configuration, build the SQLAlchemy Engine
|
Given an SQL configuration, build the SQLAlchemy Engine
|
||||||
"""
|
"""
|
||||||
logger.info(f"Building Engine for {config.serviceName}...")
|
logger.info(f"Building Engine for {workflow_source.serviceName}...")
|
||||||
service_connection_config = config.serviceConnection.__root__.config
|
service_connection_config = workflow_source.serviceConnection.__root__.config
|
||||||
options = service_connection_config.connectionOptions
|
options = service_connection_config.connectionOptions
|
||||||
if not options:
|
if not options:
|
||||||
options = {}
|
options = {}
|
||||||
@ -40,7 +40,7 @@ def get_engine(config: WorkflowSource, verbose: bool = False) -> Engine:
|
|||||||
if not connect_args:
|
if not connect_args:
|
||||||
connect_args = {}
|
connect_args = {}
|
||||||
engine = create_engine(
|
engine = create_engine(
|
||||||
get_connection_url(config.serviceConnection.__root__.config),
|
get_connection_url(service_connection_config),
|
||||||
**options,
|
**options,
|
||||||
connect_args=connect_args,
|
connect_args=connect_args,
|
||||||
echo=verbose,
|
echo=verbose,
|
||||||
|
103
ingestion/src/metadata/utils/filters.py
Normal file
103
ingestion/src/metadata/utils/filters.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Helper that implements table and filter pattern logic
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from metadata.generated.schema.type.filterPattern import FilterPatternModel
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidPatternException(Exception):
|
||||||
|
"""
|
||||||
|
Raised when an invalid pattern is configured in the workflow
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def validate_regex(regex_list: List[str]) -> None:
|
||||||
|
"""
|
||||||
|
Check that the given include/exclude regexes
|
||||||
|
are well formatted
|
||||||
|
"""
|
||||||
|
for regex in regex_list:
|
||||||
|
try:
|
||||||
|
re.compile(regex)
|
||||||
|
except re.error:
|
||||||
|
raise InvalidPatternException(f"Invalid regex {regex}.")
|
||||||
|
|
||||||
|
|
||||||
|
def _filter(filter_pattern: Optional[FilterPatternModel], name: str) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if the name needs to be filtered, False otherwise
|
||||||
|
|
||||||
|
Include takes precedence over exclude
|
||||||
|
|
||||||
|
:param filter_pattern: Model defining filtering logic
|
||||||
|
:param name: table or schema name
|
||||||
|
:return: True for filtering, False otherwise
|
||||||
|
"""
|
||||||
|
if not filter_pattern:
|
||||||
|
# No filter pattern, nothing to filter
|
||||||
|
return False
|
||||||
|
|
||||||
|
if filter_pattern.includes:
|
||||||
|
validate_regex(filter_pattern.includes)
|
||||||
|
return not any(
|
||||||
|
[
|
||||||
|
matched
|
||||||
|
for regex in filter_pattern.includes
|
||||||
|
if (matched := re.match(regex, name))
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if filter_pattern.excludes:
|
||||||
|
validate_regex(filter_pattern.excludes)
|
||||||
|
return any(
|
||||||
|
[
|
||||||
|
matched
|
||||||
|
for regex in filter_pattern.excludes
|
||||||
|
if (matched := re.match(regex, name))
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def filter_by_schema(
|
||||||
|
schema_filter_pattern: Optional[FilterPatternModel], schema_name: str
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if the schema needs to be filtered, False otherwise
|
||||||
|
|
||||||
|
Include takes precedence over exclude
|
||||||
|
|
||||||
|
:param schema_filter_pattern: Model defining schema filtering logic
|
||||||
|
:param schema_name: table schema name
|
||||||
|
:return: True for filtering, False otherwise
|
||||||
|
"""
|
||||||
|
return _filter(schema_filter_pattern, schema_name)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_by_table(
|
||||||
|
table_filter_pattern: Optional[FilterPatternModel], table_name: str
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if the table needs to be filtered, False otherwise
|
||||||
|
|
||||||
|
Include takes precedence over exclude
|
||||||
|
|
||||||
|
:param table_filter_pattern: Model defining schema filtering logic
|
||||||
|
:param table_name: table name
|
||||||
|
:return: True for filtering, False otherwise
|
||||||
|
"""
|
||||||
|
return _filter(table_filter_pattern, table_name)
|
@ -13,7 +13,6 @@ import logging
|
|||||||
import traceback
|
import traceback
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import Any, Dict, Iterable
|
from typing import Any, Dict, Iterable
|
||||||
from urllib.parse import quote_plus
|
|
||||||
|
|
||||||
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||||
from metadata.generated.schema.api.services.createDashboardService import (
|
from metadata.generated.schema.api.services.createDashboardService import (
|
||||||
|
@ -21,11 +21,11 @@ from sqlalchemy.orm import declarative_base
|
|||||||
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
|
from metadata.generated.schema.api.tests.createColumnTest import CreateColumnTestRequest
|
||||||
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
|
from metadata.generated.schema.api.tests.createTableTest import CreateTableTestRequest
|
||||||
from metadata.generated.schema.entity.data.table import Column, DataType, Table
|
from metadata.generated.schema.entity.data.table import Column, DataType, Table
|
||||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
from metadata.generated.schema.metadataIngestion.databaseServiceMetadataPipeline import (
|
||||||
OpenMetadataServerConfig,
|
DatabaseServiceMetadataPipeline,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
Source as WorkflowSource,
|
OpenMetadataServerConfig,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.tests.column.columnValuesToBeBetween import (
|
from metadata.generated.schema.tests.column.columnValuesToBeBetween import (
|
||||||
ColumnValuesToBeBetween,
|
ColumnValuesToBeBetween,
|
||||||
@ -46,10 +46,8 @@ config = {
|
|||||||
"source": {
|
"source": {
|
||||||
"type": "sqlite",
|
"type": "sqlite",
|
||||||
"serviceName": "my_service",
|
"serviceName": "my_service",
|
||||||
"serviceConnection": {
|
"serviceConnection": {"config": {"type": "SQLite"}},
|
||||||
"config": {"type": "SQLite", "hostPort": "", "database": ":memory:"}
|
"sourceConfig": {"config": {}},
|
||||||
},
|
|
||||||
"sourceConfig": {},
|
|
||||||
},
|
},
|
||||||
"processor": {"type": "orm-profiler", "config": {}},
|
"processor": {"type": "orm-profiler", "config": {}},
|
||||||
"sink": {"type": "metadata-rest", "config": {}},
|
"sink": {"type": "metadata-rest", "config": {}},
|
||||||
@ -68,7 +66,7 @@ def test_init_workflow():
|
|||||||
"""
|
"""
|
||||||
We can initialise the workflow from a config
|
We can initialise the workflow from a config
|
||||||
"""
|
"""
|
||||||
assert isinstance(workflow.source_config, WorkflowSource)
|
assert isinstance(workflow.source_config, DatabaseServiceMetadataPipeline)
|
||||||
assert isinstance(workflow.metadata_config, OpenMetadataServerConfig)
|
assert isinstance(workflow.metadata_config, OpenMetadataServerConfig)
|
||||||
|
|
||||||
assert isinstance(workflow.processor, OrmProfilerProcessor)
|
assert isinstance(workflow.processor, OrmProfilerProcessor)
|
||||||
@ -83,29 +81,33 @@ def test_filter_entities():
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
service_name = "service"
|
service_name = "service"
|
||||||
db_reference1 = EntityReference(id=uuid.uuid4(), name="one_db", type="database")
|
schema_reference1 = EntityReference(
|
||||||
db_reference2 = EntityReference(id=uuid.uuid4(), name="another_db", type="database")
|
id=uuid.uuid4(), name="one_schema", type="databaseSchema"
|
||||||
|
)
|
||||||
|
schema_reference2 = EntityReference(
|
||||||
|
id=uuid.uuid4(), name="another_schema", type="databaseSchema"
|
||||||
|
)
|
||||||
|
|
||||||
all_tables = [
|
all_tables = [
|
||||||
Table(
|
Table(
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="table1",
|
name="table1",
|
||||||
database=db_reference1,
|
databaseSchema=schema_reference1,
|
||||||
fullyQualifiedName=f"{service_name}.{db_reference1.name}.table1",
|
fullyQualifiedName=f"{service_name}.db.{schema_reference1.name}.table1",
|
||||||
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
||||||
),
|
),
|
||||||
Table(
|
Table(
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="table2",
|
name="table2",
|
||||||
database=db_reference1,
|
databaseSchema=schema_reference1,
|
||||||
fullyQualifiedName=f"{service_name}.{db_reference1.name}.table2",
|
fullyQualifiedName=f"{service_name}.db.{schema_reference1.name}.table2",
|
||||||
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
||||||
),
|
),
|
||||||
Table(
|
Table(
|
||||||
id=uuid.uuid4(),
|
id=uuid.uuid4(),
|
||||||
name="table3",
|
name="table3",
|
||||||
database=db_reference2,
|
databaseSchema=schema_reference2,
|
||||||
fullyQualifiedName=f"{service_name}.{db_reference2.name}.table3",
|
fullyQualifiedName=f"{service_name}.db.{schema_reference2.name}.table3",
|
||||||
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
columns=[Column(name="id", dataType=DataType.BIGINT)],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
@ -115,9 +117,9 @@ def test_filter_entities():
|
|||||||
|
|
||||||
# We can exclude based on the schema name
|
# We can exclude based on the schema name
|
||||||
exclude_filter_schema_config = deepcopy(config)
|
exclude_filter_schema_config = deepcopy(config)
|
||||||
exclude_filter_schema_config["source"]["config"]["schema_filter_pattern"] = {
|
exclude_filter_schema_config["source"]["sourceConfig"]["config"][
|
||||||
"excludes": ["one_db"]
|
"schemaFilterPattern"
|
||||||
}
|
] = {"excludes": ["one_schema"]}
|
||||||
|
|
||||||
exclude_filter_schema_workflow = ProfilerWorkflow.create(
|
exclude_filter_schema_workflow = ProfilerWorkflow.create(
|
||||||
exclude_filter_schema_config
|
exclude_filter_schema_config
|
||||||
@ -126,9 +128,9 @@ def test_filter_entities():
|
|||||||
|
|
||||||
# We can include based on the schema name
|
# We can include based on the schema name
|
||||||
include_filter_schema_config = deepcopy(config)
|
include_filter_schema_config = deepcopy(config)
|
||||||
include_filter_schema_config["source"]["config"]["schema_filter_pattern"] = {
|
include_filter_schema_config["source"]["sourceConfig"]["config"][
|
||||||
"includes": ["another_db"]
|
"schemaFilterPattern"
|
||||||
}
|
] = {"includes": ["another_schema"]}
|
||||||
|
|
||||||
include_filter_schema_workflow = ProfilerWorkflow.create(
|
include_filter_schema_workflow = ProfilerWorkflow.create(
|
||||||
include_filter_schema_config
|
include_filter_schema_config
|
||||||
@ -137,18 +139,18 @@ def test_filter_entities():
|
|||||||
|
|
||||||
# We can exclude based on the table name
|
# We can exclude based on the table name
|
||||||
exclude_filter_table_config = deepcopy(config)
|
exclude_filter_table_config = deepcopy(config)
|
||||||
exclude_filter_table_config["source"]["config"]["table_filter_pattern"] = {
|
exclude_filter_table_config["source"]["sourceConfig"]["config"][
|
||||||
"excludes": ["tab*"]
|
"tableFilterPattern"
|
||||||
}
|
] = {"excludes": ["tab*"]}
|
||||||
|
|
||||||
exclude_filter_table_workflow = ProfilerWorkflow.create(exclude_filter_table_config)
|
exclude_filter_table_workflow = ProfilerWorkflow.create(exclude_filter_table_config)
|
||||||
assert len(list(exclude_filter_table_workflow.filter_entities(all_tables))) == 0
|
assert len(list(exclude_filter_table_workflow.filter_entities(all_tables))) == 0
|
||||||
|
|
||||||
# We can include based on the table name
|
# We can include based on the table name
|
||||||
include_filter_table_config = deepcopy(config)
|
include_filter_table_config = deepcopy(config)
|
||||||
include_filter_table_config["source"]["config"]["table_filter_pattern"] = {
|
include_filter_table_config["source"]["sourceConfig"]["config"][
|
||||||
"includes": ["table1"]
|
"tableFilterPattern"
|
||||||
}
|
] = {"includes": ["table1"]}
|
||||||
|
|
||||||
include_filter_table_workflow = ProfilerWorkflow.create(include_filter_table_config)
|
include_filter_table_workflow = ProfilerWorkflow.create(include_filter_table_config)
|
||||||
assert len(list(include_filter_table_workflow.filter_entities(all_tables))) == 1
|
assert len(list(include_filter_table_workflow.filter_entities(all_tables))) == 1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user