mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-17 14:09:52 +00:00
Add support for external profiler workflow (#13887)
* Add support for external profiler workflow * pylint * resolve comments * resolve comments * pyformat * fix code smell
This commit is contained in:
parent
a8145a82fa
commit
4c19bb5a1d
@ -11,7 +11,7 @@
|
|||||||
"""Azure SQL source module"""
|
"""Azure SQL source module"""
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Iterable
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
from sqlalchemy.dialects.mssql.base import MSDialect, ischema_names
|
from sqlalchemy.dialects.mssql.base import MSDialect, ischema_names
|
||||||
|
|
||||||
@ -24,12 +24,14 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
|||||||
)
|
)
|
||||||
from metadata.ingestion.api.steps import InvalidSourceException
|
from metadata.ingestion.api.steps import InvalidSourceException
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
from metadata.ingestion.source.database.azuresql.queries import AZURE_SQL_GET_DATABASES
|
||||||
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
||||||
from metadata.ingestion.source.database.mssql.utils import (
|
from metadata.ingestion.source.database.mssql.utils import (
|
||||||
get_columns,
|
get_columns,
|
||||||
get_table_comment,
|
get_table_comment,
|
||||||
get_view_definition,
|
get_view_definition,
|
||||||
)
|
)
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
from metadata.utils.filters import filter_by_database
|
from metadata.utils.filters import filter_by_database
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
@ -50,7 +52,7 @@ MSDialect.get_all_table_comments = get_all_table_comments
|
|||||||
MSDialect.get_columns = get_columns
|
MSDialect.get_columns = get_columns
|
||||||
|
|
||||||
|
|
||||||
class AzuresqlSource(CommonDbSourceService):
|
class AzuresqlSource(CommonDbSourceService, MultiDBSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from Azuresql Source
|
Database metadata from Azuresql Source
|
||||||
@ -66,6 +68,14 @@ class AzuresqlSource(CommonDbSourceService):
|
|||||||
)
|
)
|
||||||
return cls(config, metadata)
|
return cls(config, metadata)
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
if not self.service_connection.ingestAllDatabases:
|
||||||
|
return self.service_connection.database
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
yield from self._execute_database_query(AZURE_SQL_GET_DATABASES)
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
|
|
||||||
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
||||||
@ -73,12 +83,7 @@ class AzuresqlSource(CommonDbSourceService):
|
|||||||
self.set_inspector(database_name=configured_db)
|
self.set_inspector(database_name=configured_db)
|
||||||
yield configured_db
|
yield configured_db
|
||||||
else:
|
else:
|
||||||
results = self.connection.execute(
|
for new_database in self.get_database_names_raw():
|
||||||
"SELECT name FROM master.sys.databases order by name"
|
|
||||||
)
|
|
||||||
for res in results:
|
|
||||||
row = list(res)
|
|
||||||
new_database = row[0]
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
|
@ -0,0 +1,15 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
SQL Queries used during ingestion
|
||||||
|
"""
|
||||||
|
|
||||||
|
AZURE_SQL_GET_DATABASES = "SELECT name FROM master.sys.databases order by name"
|
@ -68,6 +68,7 @@ from metadata.ingestion.source.database.common_db_source import (
|
|||||||
CommonDbSourceService,
|
CommonDbSourceService,
|
||||||
TableNameAndType,
|
TableNameAndType,
|
||||||
)
|
)
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.ingestion.source.database.stored_procedures_mixin import (
|
from metadata.ingestion.source.database.stored_procedures_mixin import (
|
||||||
QueryByProcedure,
|
QueryByProcedure,
|
||||||
StoredProcedureMixin,
|
StoredProcedureMixin,
|
||||||
@ -190,7 +191,7 @@ BigQueryDialect._build_formatted_table_id = ( # pylint: disable=protected-acces
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BigquerySource(StoredProcedureMixin, CommonDbSourceService):
|
class BigquerySource(StoredProcedureMixin, CommonDbSourceService, MultiDBSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from Bigquery Source
|
Database metadata from Bigquery Source
|
||||||
@ -421,6 +422,12 @@ class BigquerySource(StoredProcedureMixin, CommonDbSourceService):
|
|||||||
self.engine = inspector_details.engine
|
self.engine = inspector_details.engine
|
||||||
self.inspector = inspector_details.inspector
|
self.inspector = inspector_details.inspector
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
yield from self.project_ids
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
for project_id in self.project_ids:
|
for project_id in self.project_ids:
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
|
@ -13,7 +13,7 @@ Greenplum source module
|
|||||||
"""
|
"""
|
||||||
import traceback
|
import traceback
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from typing import Iterable, Tuple
|
from typing import Iterable, Optional, Tuple
|
||||||
|
|
||||||
from sqlalchemy import sql
|
from sqlalchemy import sql
|
||||||
from sqlalchemy.dialects.postgresql.base import PGDialect, ischema_names
|
from sqlalchemy.dialects.postgresql.base import PGDialect, ischema_names
|
||||||
@ -51,6 +51,7 @@ from metadata.ingestion.source.database.greenplum.utils import (
|
|||||||
get_table_comment,
|
get_table_comment,
|
||||||
get_view_definition,
|
get_view_definition,
|
||||||
)
|
)
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
from metadata.utils.filters import filter_by_database
|
from metadata.utils.filters import filter_by_database
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
@ -109,7 +110,7 @@ PGDialect.get_all_view_definitions = get_all_view_definitions
|
|||||||
PGDialect.ischema_names = ischema_names
|
PGDialect.ischema_names = ischema_names
|
||||||
|
|
||||||
|
|
||||||
class GreenplumSource(CommonDbSourceService):
|
class GreenplumSource(CommonDbSourceService, MultiDBSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from Greenplum Source
|
Database metadata from Greenplum Source
|
||||||
@ -144,16 +145,21 @@ class GreenplumSource(CommonDbSourceService):
|
|||||||
for name, relkind in result
|
for name, relkind in result
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
if not self.service_connection.ingestAllDatabases:
|
||||||
|
return self.service_connection.database
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
yield from self._execute_database_query(GREENPLUM_GET_DB_NAMES)
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
||||||
configured_db = self.config.serviceConnection.__root__.config.database
|
configured_db = self.config.serviceConnection.__root__.config.database
|
||||||
self.set_inspector(database_name=configured_db)
|
self.set_inspector(database_name=configured_db)
|
||||||
yield configured_db
|
yield configured_db
|
||||||
else:
|
else:
|
||||||
results = self.connection.execute(GREENPLUM_GET_DB_NAMES)
|
for new_database in self.get_database_names_raw():
|
||||||
for res in results:
|
|
||||||
row = list(res)
|
|
||||||
new_database = row[0]
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""MSSQL source module"""
|
"""MSSQL source module"""
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Iterable
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
from sqlalchemy.dialects.mssql.base import MSDialect, ischema_names
|
from sqlalchemy.dialects.mssql.base import MSDialect, ischema_names
|
||||||
|
|
||||||
@ -24,11 +24,13 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
|||||||
from metadata.ingestion.api.steps import InvalidSourceException
|
from metadata.ingestion.api.steps import InvalidSourceException
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
||||||
|
from metadata.ingestion.source.database.mssql.queries import MSSQL_GET_DATABASE
|
||||||
from metadata.ingestion.source.database.mssql.utils import (
|
from metadata.ingestion.source.database.mssql.utils import (
|
||||||
get_columns,
|
get_columns,
|
||||||
get_table_comment,
|
get_table_comment,
|
||||||
get_view_definition,
|
get_view_definition,
|
||||||
)
|
)
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
from metadata.utils.filters import filter_by_database
|
from metadata.utils.filters import filter_by_database
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
@ -53,7 +55,7 @@ MSDialect.get_all_table_comments = get_all_table_comments
|
|||||||
MSDialect.get_columns = get_columns
|
MSDialect.get_columns = get_columns
|
||||||
|
|
||||||
|
|
||||||
class MssqlSource(CommonDbSourceService):
|
class MssqlSource(CommonDbSourceService, MultiDBSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from MSSQL Source
|
Database metadata from MSSQL Source
|
||||||
@ -70,6 +72,14 @@ class MssqlSource(CommonDbSourceService):
|
|||||||
)
|
)
|
||||||
return cls(config, metadata)
|
return cls(config, metadata)
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
if not self.service_connection.ingestAllDatabases:
|
||||||
|
return self.service_connection.database
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
yield from self._execute_database_query(MSSQL_GET_DATABASE)
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
|
|
||||||
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
||||||
@ -77,12 +87,7 @@ class MssqlSource(CommonDbSourceService):
|
|||||||
self.set_inspector(database_name=configured_db)
|
self.set_inspector(database_name=configured_db)
|
||||||
yield configured_db
|
yield configured_db
|
||||||
else:
|
else:
|
||||||
results = self.connection.execute(
|
for new_database in self.get_database_names_raw():
|
||||||
"SELECT name FROM master.sys.databases order by name"
|
|
||||||
)
|
|
||||||
for res in results:
|
|
||||||
row = list(res)
|
|
||||||
new_database = row[0]
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Multi DB Source Abstract class
|
||||||
|
"""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class MultiDBSource(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Method to return the name of default configured database if available
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
"""
|
||||||
|
Method to return the name of all databases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _execute_database_query(self, query: str) -> Iterable[str]:
|
||||||
|
results = self.connection.execute(query) # pylint: disable=no-member
|
||||||
|
for res in results:
|
||||||
|
row = list(res)
|
||||||
|
yield row[0]
|
@ -13,7 +13,7 @@ Postgres source module
|
|||||||
"""
|
"""
|
||||||
import traceback
|
import traceback
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from typing import Iterable, Tuple
|
from typing import Iterable, Optional, Tuple
|
||||||
|
|
||||||
from sqlalchemy import sql
|
from sqlalchemy import sql
|
||||||
from sqlalchemy.dialects.postgresql.base import PGDialect, ischema_names
|
from sqlalchemy.dialects.postgresql.base import PGDialect, ischema_names
|
||||||
@ -40,6 +40,7 @@ from metadata.ingestion.source.database.common_db_source import (
|
|||||||
CommonDbSourceService,
|
CommonDbSourceService,
|
||||||
TableNameAndType,
|
TableNameAndType,
|
||||||
)
|
)
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.ingestion.source.database.postgres.queries import (
|
from metadata.ingestion.source.database.postgres.queries import (
|
||||||
POSTGRES_GET_ALL_TABLE_PG_POLICY,
|
POSTGRES_GET_ALL_TABLE_PG_POLICY,
|
||||||
POSTGRES_GET_DB_NAMES,
|
POSTGRES_GET_DB_NAMES,
|
||||||
@ -111,7 +112,7 @@ PGDialect.get_all_view_definitions = get_all_view_definitions
|
|||||||
PGDialect.ischema_names = ischema_names
|
PGDialect.ischema_names = ischema_names
|
||||||
|
|
||||||
|
|
||||||
class PostgresSource(CommonDbSourceService):
|
class PostgresSource(CommonDbSourceService, MultiDBSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from Postgres Source
|
Database metadata from Postgres Source
|
||||||
@ -146,16 +147,21 @@ class PostgresSource(CommonDbSourceService):
|
|||||||
for name, relkind in result
|
for name, relkind in result
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
if not self.service_connection.ingestAllDatabases:
|
||||||
|
return self.service_connection.database
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
yield from self._execute_database_query(POSTGRES_GET_DB_NAMES)
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
||||||
configured_db = self.config.serviceConnection.__root__.config.database
|
configured_db = self.config.serviceConnection.__root__.config.database
|
||||||
self.set_inspector(database_name=configured_db)
|
self.set_inspector(database_name=configured_db)
|
||||||
yield configured_db
|
yield configured_db
|
||||||
else:
|
else:
|
||||||
results = self.connection.execute(POSTGRES_GET_DB_NAMES)
|
for new_database in self.get_database_names_raw():
|
||||||
for res in results:
|
|
||||||
row = list(res)
|
|
||||||
new_database = row[0]
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
|
@ -50,6 +50,7 @@ from metadata.ingestion.source.database.common_db_source import (
|
|||||||
CommonDbSourceService,
|
CommonDbSourceService,
|
||||||
TableNameAndType,
|
TableNameAndType,
|
||||||
)
|
)
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.ingestion.source.database.redshift.models import RedshiftStoredProcedure
|
from metadata.ingestion.source.database.redshift.models import RedshiftStoredProcedure
|
||||||
from metadata.ingestion.source.database.redshift.queries import (
|
from metadata.ingestion.source.database.redshift.queries import (
|
||||||
REDSHIFT_GET_ALL_RELATION_INFO,
|
REDSHIFT_GET_ALL_RELATION_INFO,
|
||||||
@ -101,7 +102,7 @@ RedshiftDialect._get_all_relation_info = ( # pylint: disable=protected-access
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RedshiftSource(StoredProcedureMixin, CommonDbSourceService):
|
class RedshiftSource(StoredProcedureMixin, CommonDbSourceService, MultiDBSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from Redshift Source
|
Database metadata from Redshift Source
|
||||||
@ -153,16 +154,21 @@ class RedshiftSource(StoredProcedureMixin, CommonDbSourceService):
|
|||||||
for name, relkind in result
|
for name, relkind in result
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
if not self.service_connection.ingestAllDatabases:
|
||||||
|
return self.service_connection.database
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
yield from self._execute_database_query(REDSHIFT_GET_DATABASE_NAMES)
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
|
||||||
self.inspector = inspect(self.engine)
|
self.inspector = inspect(self.engine)
|
||||||
self.get_partition_details()
|
self.get_partition_details()
|
||||||
yield self.config.serviceConnection.__root__.config.database
|
yield self.config.serviceConnection.__root__.config.database
|
||||||
else:
|
else:
|
||||||
results = self.connection.execute(REDSHIFT_GET_DATABASE_NAMES)
|
for new_database in self.get_database_names_raw():
|
||||||
for res in results:
|
|
||||||
row = list(res)
|
|
||||||
new_database = row[0]
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
|
@ -52,6 +52,7 @@ from metadata.ingestion.source.database.common_db_source import (
|
|||||||
from metadata.ingestion.source.database.life_cycle_query_mixin import (
|
from metadata.ingestion.source.database.life_cycle_query_mixin import (
|
||||||
LifeCycleQueryMixin,
|
LifeCycleQueryMixin,
|
||||||
)
|
)
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.ingestion.source.database.snowflake.models import (
|
from metadata.ingestion.source.database.snowflake.models import (
|
||||||
STORED_PROC_LANGUAGE_MAP,
|
STORED_PROC_LANGUAGE_MAP,
|
||||||
SnowflakeStoredProcedure,
|
SnowflakeStoredProcedure,
|
||||||
@ -122,7 +123,9 @@ SnowflakeDialect.get_foreign_keys = get_foreign_keys
|
|||||||
SnowflakeDialect.get_columns = get_columns
|
SnowflakeDialect.get_columns = get_columns
|
||||||
|
|
||||||
|
|
||||||
class SnowflakeSource(LifeCycleQueryMixin, StoredProcedureMixin, CommonDbSourceService):
|
class SnowflakeSource(
|
||||||
|
LifeCycleQueryMixin, StoredProcedureMixin, CommonDbSourceService, MultiDBSource
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from Snowflake Source
|
Database metadata from Snowflake Source
|
||||||
@ -214,6 +217,15 @@ class SnowflakeSource(LifeCycleQueryMixin, StoredProcedureMixin, CommonDbSourceS
|
|||||||
"""
|
"""
|
||||||
return self.database_desc_map.get(database_name)
|
return self.database_desc_map.get(database_name)
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
return self.service_connection.database
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
results = self.connection.execute(SNOWFLAKE_GET_DATABASES)
|
||||||
|
for res in results:
|
||||||
|
row = list(res)
|
||||||
|
yield row[1]
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
configured_db = self.config.serviceConnection.__root__.config.database
|
configured_db = self.config.serviceConnection.__root__.config.database
|
||||||
if configured_db:
|
if configured_db:
|
||||||
@ -224,10 +236,7 @@ class SnowflakeSource(LifeCycleQueryMixin, StoredProcedureMixin, CommonDbSourceS
|
|||||||
self.set_database_description_map()
|
self.set_database_description_map()
|
||||||
yield configured_db
|
yield configured_db
|
||||||
else:
|
else:
|
||||||
results = self.connection.execute(SNOWFLAKE_GET_DATABASES)
|
for new_database in self.get_database_names_raw():
|
||||||
for res in results:
|
|
||||||
row = list(res)
|
|
||||||
new_database = row[1]
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
|
@ -32,6 +32,7 @@ from metadata.ingestion.api.steps import InvalidSourceException
|
|||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.database.column_type_parser import create_sqlalchemy_type
|
from metadata.ingestion.source.database.column_type_parser import create_sqlalchemy_type
|
||||||
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
from metadata.ingestion.source.database.common_db_source import CommonDbSourceService
|
||||||
|
from metadata.ingestion.source.database.multi_db_source import MultiDBSource
|
||||||
from metadata.ingestion.source.database.vertica.queries import (
|
from metadata.ingestion.source.database.vertica.queries import (
|
||||||
VERTICA_GET_COLUMNS,
|
VERTICA_GET_COLUMNS,
|
||||||
VERTICA_GET_PRIMARY_KEYS,
|
VERTICA_GET_PRIMARY_KEYS,
|
||||||
@ -262,7 +263,7 @@ VerticaDialect.get_all_table_comments = get_all_table_comments
|
|||||||
VerticaDialect.get_table_comment = get_table_comment
|
VerticaDialect.get_table_comment = get_table_comment
|
||||||
|
|
||||||
|
|
||||||
class VerticaSource(CommonDbSourceService):
|
class VerticaSource(CommonDbSourceService, MultiDBSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods to extract
|
Implements the necessary methods to extract
|
||||||
Database metadata from Vertica Source
|
Database metadata from Vertica Source
|
||||||
@ -293,6 +294,12 @@ class VerticaSource(CommonDbSourceService):
|
|||||||
self.engine, VERTICA_SCHEMA_COMMENTS
|
self.engine, VERTICA_SCHEMA_COMMENTS
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_configured_database(self) -> Optional[str]:
|
||||||
|
return self.service_connection.database
|
||||||
|
|
||||||
|
def get_database_names_raw(self) -> Iterable[str]:
|
||||||
|
yield from self._execute_database_query(VERTICA_LIST_DATABASES)
|
||||||
|
|
||||||
def get_database_names(self) -> Iterable[str]:
|
def get_database_names(self) -> Iterable[str]:
|
||||||
configured_db = self.config.serviceConnection.__root__.config.database
|
configured_db = self.config.serviceConnection.__root__.config.database
|
||||||
if configured_db:
|
if configured_db:
|
||||||
@ -300,10 +307,7 @@ class VerticaSource(CommonDbSourceService):
|
|||||||
self.set_schema_description_map()
|
self.set_schema_description_map()
|
||||||
yield configured_db
|
yield configured_db
|
||||||
else:
|
else:
|
||||||
results = self.connection.execute(VERTICA_LIST_DATABASES)
|
for new_database in self.get_database_names_raw():
|
||||||
for res in results:
|
|
||||||
row = list(res)
|
|
||||||
new_database = row[0]
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
|
@ -64,13 +64,16 @@ class OpenMetadataSource(Source):
|
|||||||
We do this here as well.
|
We do this here as well.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def init_steps(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
# pylint: disable=super-init-not-called
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: OpenMetadataWorkflowConfig,
|
config: OpenMetadataWorkflowConfig,
|
||||||
metadata: OpenMetadata,
|
metadata: OpenMetadata,
|
||||||
):
|
):
|
||||||
|
self.init_steps()
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
self.metadata = metadata
|
self.metadata = metadata
|
||||||
|
293
ingestion/src/metadata/profiler/source/metadata_ext.py
Normal file
293
ingestion/src/metadata/profiler/source/metadata_ext.py
Normal file
@ -0,0 +1,293 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
OpenMetadataExt source for the profiler
|
||||||
|
|
||||||
|
This source is used in cases where the service name
|
||||||
|
is not provided for the profiler workflow.
|
||||||
|
In such situations, the profiler will perform a thorough scan
|
||||||
|
of the entire data source to locate the
|
||||||
|
corresponding table entity in OpenMetadata.
|
||||||
|
Subsequently, it will proceed to ingest relevant metrics
|
||||||
|
and sample data for that identified entity.
|
||||||
|
"""
|
||||||
|
import traceback
|
||||||
|
from copy import deepcopy
|
||||||
|
from typing import Iterable, cast
|
||||||
|
|
||||||
|
from sqlalchemy.inspection import inspect
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
|
from metadata.generated.schema.entity.data.table import Table, TableType
|
||||||
|
from metadata.generated.schema.metadataIngestion.databaseServiceMetadataPipeline import (
|
||||||
|
DatabaseServiceMetadataPipeline,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.metadataIngestion.databaseServiceProfilerPipeline import (
|
||||||
|
DatabaseServiceProfilerPipeline,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataWorkflowConfig,
|
||||||
|
)
|
||||||
|
from metadata.ingestion.api.models import Either, StackTraceError
|
||||||
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
from metadata.ingestion.source.connections import get_connection
|
||||||
|
from metadata.profiler.source.metadata import (
|
||||||
|
OpenMetadataSource,
|
||||||
|
ProfilerSourceAndEntity,
|
||||||
|
)
|
||||||
|
from metadata.profiler.source.profiler_source_factory import profiler_source_factory
|
||||||
|
from metadata.utils import fqn
|
||||||
|
from metadata.utils.class_helper import get_service_type_from_source_type
|
||||||
|
from metadata.utils.filters import filter_by_database, filter_by_schema, filter_by_table
|
||||||
|
from metadata.utils.importer import import_source_class
|
||||||
|
from metadata.utils.logger import profiler_logger
|
||||||
|
|
||||||
|
logger = profiler_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class OpenMetadataSourceExt(OpenMetadataSource):
|
||||||
|
"""
|
||||||
|
This source lists and filters the entities that need
|
||||||
|
to be processed by the profiler workflow.
|
||||||
|
|
||||||
|
Note that in order to manage the following steps we need
|
||||||
|
to test the connection against the Database Service Source.
|
||||||
|
We do this here as well.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# pylint: disable=super-init-not-called
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
config: OpenMetadataWorkflowConfig,
|
||||||
|
metadata: OpenMetadata,
|
||||||
|
):
|
||||||
|
self.init_steps()
|
||||||
|
|
||||||
|
self.config = config
|
||||||
|
self.metadata = metadata
|
||||||
|
self.test_connection()
|
||||||
|
|
||||||
|
# Init and type the source config
|
||||||
|
self.service_connection = self.config.source.serviceConnection.__root__.config
|
||||||
|
self.source_config: DatabaseServiceProfilerPipeline = cast(
|
||||||
|
DatabaseServiceProfilerPipeline, self.config.source.sourceConfig.config
|
||||||
|
) # Used to satisfy type checked
|
||||||
|
source_type = self.config.source.type.lower()
|
||||||
|
service_type = get_service_type_from_source_type(self.config.source.type)
|
||||||
|
source_class = import_source_class(
|
||||||
|
service_type=service_type, source_type=source_type
|
||||||
|
)
|
||||||
|
database_source_config = DatabaseServiceMetadataPipeline()
|
||||||
|
new_config = deepcopy(self.config.source)
|
||||||
|
new_config.sourceConfig.config = database_source_config
|
||||||
|
self.source = source_class.create(new_config.dict(), self.metadata)
|
||||||
|
self.engine = None
|
||||||
|
self.inspector = None
|
||||||
|
self._connection = None
|
||||||
|
self.set_inspector()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Starting profiler for service {self.config.source.type}"
|
||||||
|
f":{self.config.source.type.lower()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def set_inspector(self, database_name: str = None) -> None:
|
||||||
|
"""
|
||||||
|
When sources override `get_database_names`, they will need
|
||||||
|
to setup multiple inspectors. They can use this function.
|
||||||
|
:param database_name: new database to set
|
||||||
|
"""
|
||||||
|
new_service_connection = deepcopy(self.service_connection)
|
||||||
|
if database_name:
|
||||||
|
logger.info(f"Ingesting from database: {database_name}")
|
||||||
|
new_service_connection.database = database_name
|
||||||
|
self.engine = get_connection(new_service_connection)
|
||||||
|
self.inspector = inspect(self.engine)
|
||||||
|
self._connection = None # Lazy init as well
|
||||||
|
|
||||||
|
def _iter(self, *_, **__) -> Iterable[Either[ProfilerSourceAndEntity]]:
|
||||||
|
for database_name in self.get_database_names():
|
||||||
|
try:
|
||||||
|
database_entity = fqn.search_database_from_es(
|
||||||
|
database_name=database_name,
|
||||||
|
metadata=self.metadata,
|
||||||
|
service_name=None,
|
||||||
|
)
|
||||||
|
if not database_entity:
|
||||||
|
logger.debug(
|
||||||
|
f"Database Entity for database `{database_name}` not found"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
for schema_name in self.get_schema_names():
|
||||||
|
for table_name in self.get_table_names(schema_name):
|
||||||
|
table_entity = fqn.search_table_from_es(
|
||||||
|
metadata=self.metadata,
|
||||||
|
database_name=database_name,
|
||||||
|
service_name=None,
|
||||||
|
schema_name=schema_name,
|
||||||
|
table_name=table_name,
|
||||||
|
fields="tableProfilerConfig",
|
||||||
|
)
|
||||||
|
if not table_entity:
|
||||||
|
logger.debug(
|
||||||
|
f"Table Entity for table `{database_name}.{schema_name}.{table_name}` not found"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
profiler_source = profiler_source_factory.create(
|
||||||
|
self.config.source.type.lower(),
|
||||||
|
self.config,
|
||||||
|
database_entity,
|
||||||
|
self.metadata,
|
||||||
|
)
|
||||||
|
yield Either(
|
||||||
|
right=ProfilerSourceAndEntity(
|
||||||
|
profiler_source=profiler_source,
|
||||||
|
entity=table_entity,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
yield Either(
|
||||||
|
left=StackTraceError(
|
||||||
|
name=database_name,
|
||||||
|
error=f"Error listing source and entities for database due to [{exc}]",
|
||||||
|
stack_trace=traceback.format_exc(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_table_names(self, schema_name: str) -> Iterable[str]:
|
||||||
|
for table_name in self.inspector.get_table_names(schema_name) or []:
|
||||||
|
if filter_by_table(self.source_config.tableFilterPattern, table_name):
|
||||||
|
self.status.filter(table_name, "Table pattern not allowed")
|
||||||
|
continue
|
||||||
|
yield table_name
|
||||||
|
|
||||||
|
def get_schema_names(self) -> Iterable[str]:
|
||||||
|
if self.service_connection.__dict__.get("databaseSchema"):
|
||||||
|
yield self.service_connection.databaseSchema
|
||||||
|
else:
|
||||||
|
for schema_name in self.inspector.get_schema_names():
|
||||||
|
if filter_by_schema(
|
||||||
|
self.source_config.schemaFilterPattern, schema_name
|
||||||
|
):
|
||||||
|
self.status.filter(schema_name, "Schema pattern not allowed")
|
||||||
|
continue
|
||||||
|
yield schema_name
|
||||||
|
|
||||||
|
def get_database_names(self) -> Iterable[str]:
|
||||||
|
"""
|
||||||
|
Method to fetch database names from source
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if hasattr(self.service_connection, "supportsDatabase"):
|
||||||
|
configured_db = self.source.get_configured_database()
|
||||||
|
if configured_db:
|
||||||
|
yield configured_db
|
||||||
|
else:
|
||||||
|
database_names = self.source.get_database_names_raw()
|
||||||
|
for database in database_names:
|
||||||
|
if filter_by_database(
|
||||||
|
self.source_config.databaseFilterPattern, database
|
||||||
|
):
|
||||||
|
self.status.filter(database, "Database pattern not allowed")
|
||||||
|
continue
|
||||||
|
self.set_inspector(database_name=database)
|
||||||
|
yield database
|
||||||
|
else:
|
||||||
|
custom_database_name = self.service_connection.__dict__.get(
|
||||||
|
"databaseName"
|
||||||
|
)
|
||||||
|
database_name = self.service_connection.__dict__.get(
|
||||||
|
"database", custom_database_name or "default"
|
||||||
|
)
|
||||||
|
yield database_name
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"Failed to fetch database names {exc}")
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
|
def filter_entities(self, tables: Iterable[Table]) -> Iterable[Table]:
|
||||||
|
"""
|
||||||
|
From a list of tables, apply the SQLSourceConfig
|
||||||
|
filter patterns.
|
||||||
|
|
||||||
|
We will update the status on the SQLSource Status.
|
||||||
|
"""
|
||||||
|
for table in tables:
|
||||||
|
try:
|
||||||
|
if filter_by_schema(
|
||||||
|
self.source_config.schemaFilterPattern,
|
||||||
|
table.databaseSchema.name, # type: ignore
|
||||||
|
):
|
||||||
|
self.status.filter(
|
||||||
|
f"Schema pattern not allowed: {table.fullyQualifiedName.__root__}",
|
||||||
|
"Schema pattern not allowed",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
if filter_by_table(
|
||||||
|
self.source_config.tableFilterPattern,
|
||||||
|
table.name.__root__,
|
||||||
|
):
|
||||||
|
self.status.filter(
|
||||||
|
f"Table pattern not allowed: {table.fullyQualifiedName.__root__}",
|
||||||
|
"Table pattern not allowed",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
if (
|
||||||
|
table.tableType == TableType.View
|
||||||
|
and not self.source_config.includeViews
|
||||||
|
):
|
||||||
|
self.status.filter(
|
||||||
|
table.fullyQualifiedName.__root__,
|
||||||
|
"View filtered out",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
yield table
|
||||||
|
except Exception as exc:
|
||||||
|
self.status.failed(
|
||||||
|
StackTraceError(
|
||||||
|
name=table.fullyQualifiedName.__root__,
|
||||||
|
error=f"Unexpected error filtering entities for table [{table}]: {exc}",
|
||||||
|
stack_trace=traceback.format_exc(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_table_entities(self, database):
|
||||||
|
"""
|
||||||
|
List and filter OpenMetadata tables based on the
|
||||||
|
source configuration.
|
||||||
|
|
||||||
|
The listing will be based on the entities from the
|
||||||
|
informed service name in the source configuration.
|
||||||
|
|
||||||
|
Note that users can specify `table_filter_pattern` to
|
||||||
|
either be `includes` or `excludes`. This means
|
||||||
|
that we will either what is specified in `includes`
|
||||||
|
or we will use everything but the tables excluded.
|
||||||
|
|
||||||
|
Same with `schema_filter_pattern`.
|
||||||
|
"""
|
||||||
|
tables = self.metadata.list_all_entities(
|
||||||
|
entity=Table,
|
||||||
|
fields=[
|
||||||
|
"tableProfilerConfig",
|
||||||
|
],
|
||||||
|
params={
|
||||||
|
"service": self.config.source.serviceName,
|
||||||
|
"database": fqn.build(
|
||||||
|
self.metadata,
|
||||||
|
entity_type=Database,
|
||||||
|
service_name=self.config.source.serviceName,
|
||||||
|
database_name=database.name.__root__,
|
||||||
|
),
|
||||||
|
}, # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
|
yield from self.filter_entities(tables)
|
@ -531,12 +531,12 @@ def build_es_fqn_search_string(
|
|||||||
Returns:
|
Returns:
|
||||||
FQN search string
|
FQN search string
|
||||||
"""
|
"""
|
||||||
if not service_name or not table_name:
|
if not table_name:
|
||||||
raise FQNBuildingException(
|
raise FQNBuildingException(
|
||||||
f"Service Name and Table Name should be informed, but got service=`{service_name}`, table=`{table_name}`"
|
f"Table Name should be informed, but got table=`{table_name}`"
|
||||||
)
|
)
|
||||||
fqn_search_string = _build(
|
fqn_search_string = _build(
|
||||||
service_name, database_name or "*", schema_name or "*", table_name
|
service_name or "*", database_name or "*", schema_name or "*", table_name
|
||||||
)
|
)
|
||||||
return fqn_search_string
|
return fqn_search_string
|
||||||
|
|
||||||
@ -548,6 +548,7 @@ def search_table_from_es(
|
|||||||
service_name: str,
|
service_name: str,
|
||||||
table_name: str,
|
table_name: str,
|
||||||
fetch_multiple_entities: bool = False,
|
fetch_multiple_entities: bool = False,
|
||||||
|
fields: Optional[str] = None,
|
||||||
):
|
):
|
||||||
fqn_search_string = build_es_fqn_search_string(
|
fqn_search_string = build_es_fqn_search_string(
|
||||||
database_name, schema_name, service_name, table_name
|
database_name, schema_name, service_name, table_name
|
||||||
@ -556,6 +557,36 @@ def search_table_from_es(
|
|||||||
es_result = metadata.es_search_from_fqn(
|
es_result = metadata.es_search_from_fqn(
|
||||||
entity_type=Table,
|
entity_type=Table,
|
||||||
fqn_search_string=fqn_search_string,
|
fqn_search_string=fqn_search_string,
|
||||||
|
fields=fields,
|
||||||
|
)
|
||||||
|
|
||||||
|
return get_entity_from_es_result(
|
||||||
|
entity_list=es_result, fetch_multiple_entities=fetch_multiple_entities
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def search_database_from_es(
|
||||||
|
metadata: OpenMetadata,
|
||||||
|
database_name: str,
|
||||||
|
service_name: Optional[str],
|
||||||
|
fetch_multiple_entities: Optional[bool] = False,
|
||||||
|
fields: Optional[str] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Search Database entity from ES
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not database_name:
|
||||||
|
raise FQNBuildingException(
|
||||||
|
f"Database Name should be informed, but got database=`{database_name}`"
|
||||||
|
)
|
||||||
|
|
||||||
|
fqn_search_string = _build(service_name or "*", database_name)
|
||||||
|
|
||||||
|
es_result = metadata.es_search_from_fqn(
|
||||||
|
entity_type=Database,
|
||||||
|
fqn_search_string=fqn_search_string,
|
||||||
|
fields=fields,
|
||||||
)
|
)
|
||||||
|
|
||||||
return get_entity_from_es_result(
|
return get_entity_from_es_result(
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
Workflow definition for metadata related ingestions: metadata and lineage.
|
Workflow definition for metadata related ingestions: metadata and lineage.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from metadata.config.common import WorkflowExecutionError
|
||||||
from metadata.ingestion.api.steps import Sink, Source
|
from metadata.ingestion.api.steps import Sink, Source
|
||||||
from metadata.utils.importer import (
|
from metadata.utils.importer import (
|
||||||
import_from_module,
|
import_from_module,
|
||||||
@ -40,6 +41,12 @@ class MetadataWorkflow(BaseWorkflow):
|
|||||||
def _get_source(self) -> Source:
|
def _get_source(self) -> Source:
|
||||||
# Source that we are ingesting, e.g., mysql, looker or kafka
|
# Source that we are ingesting, e.g., mysql, looker or kafka
|
||||||
source_type = self.config.source.type.lower()
|
source_type = self.config.source.type.lower()
|
||||||
|
if not self.config.source.serviceName:
|
||||||
|
raise WorkflowExecutionError(
|
||||||
|
"serviceName is required field for executing the Metadata Workflow. "
|
||||||
|
"You can find more information on how to build the YAML "
|
||||||
|
"configuration here: https://docs.open-metadata.org/connectors"
|
||||||
|
)
|
||||||
|
|
||||||
source_class = (
|
source_class = (
|
||||||
import_from_module(
|
import_from_module(
|
||||||
|
@ -19,6 +19,7 @@ from metadata.ingestion.source.connections import get_connection, get_test_conne
|
|||||||
from metadata.pii.processor import PIIProcessor
|
from metadata.pii.processor import PIIProcessor
|
||||||
from metadata.profiler.processor.processor import ProfilerProcessor
|
from metadata.profiler.processor.processor import ProfilerProcessor
|
||||||
from metadata.profiler.source.metadata import OpenMetadataSource
|
from metadata.profiler.source.metadata import OpenMetadataSource
|
||||||
|
from metadata.profiler.source.metadata_ext import OpenMetadataSourceExt
|
||||||
from metadata.utils.importer import import_sink_class
|
from metadata.utils.importer import import_sink_class
|
||||||
from metadata.utils.logger import profiler_logger
|
from metadata.utils.logger import profiler_logger
|
||||||
from metadata.workflow.base import BaseWorkflow
|
from metadata.workflow.base import BaseWorkflow
|
||||||
@ -40,8 +41,19 @@ class ProfilerWorkflow(BaseWorkflow):
|
|||||||
# Validate that we can properly reach the source database
|
# Validate that we can properly reach the source database
|
||||||
self.test_connection()
|
self.test_connection()
|
||||||
|
|
||||||
|
def _get_source_class(self):
|
||||||
|
if self.config.source.serviceName:
|
||||||
|
return OpenMetadataSource
|
||||||
|
logger.info(
|
||||||
|
"Database Service name not provided, we will scan all the tables"
|
||||||
|
"available within data source and locate table entity in OpenMetadata"
|
||||||
|
"to ingest profiler data."
|
||||||
|
)
|
||||||
|
return OpenMetadataSourceExt
|
||||||
|
|
||||||
def set_steps(self):
|
def set_steps(self):
|
||||||
self.source = OpenMetadataSource.create(self.config.dict(), self.metadata)
|
source_class = self._get_source_class()
|
||||||
|
self.source = source_class.create(self.config.dict(), self.metadata)
|
||||||
|
|
||||||
profiler_processor = self._get_profiler_processor()
|
profiler_processor = self._get_profiler_processor()
|
||||||
pii_processor = self._get_pii_processor()
|
pii_processor = self._get_pii_processor()
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
Usage Workflow Definition
|
Usage Workflow Definition
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from metadata.config.common import WorkflowExecutionError
|
||||||
from metadata.ingestion.api.steps import BulkSink, Processor, Source, Stage
|
from metadata.ingestion.api.steps import BulkSink, Processor, Source, Stage
|
||||||
from metadata.utils.importer import (
|
from metadata.utils.importer import (
|
||||||
import_bulk_sink_type,
|
import_bulk_sink_type,
|
||||||
@ -44,6 +45,12 @@ class UsageWorkflow(BaseWorkflow):
|
|||||||
def _get_source(self) -> Source:
|
def _get_source(self) -> Source:
|
||||||
# Source that we are ingesting, e.g., mysql, looker or kafka
|
# Source that we are ingesting, e.g., mysql, looker or kafka
|
||||||
source_type = self.config.source.type.lower()
|
source_type = self.config.source.type.lower()
|
||||||
|
if not self.config.source.serviceName:
|
||||||
|
raise WorkflowExecutionError(
|
||||||
|
"ServiceName is required field for executing the Usage Workflow. "
|
||||||
|
"You can find more information on how to build the YAML "
|
||||||
|
"configuration here: https://docs.open-metadata.org/connectors"
|
||||||
|
)
|
||||||
|
|
||||||
source_class = (
|
source_class = (
|
||||||
import_from_module(
|
import_from_module(
|
||||||
|
@ -83,7 +83,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": ["type", "serviceName", "sourceConfig"]
|
"required": ["type", "sourceConfig"]
|
||||||
},
|
},
|
||||||
"processor": {
|
"processor": {
|
||||||
"description": "Configuration for Processor Component in the OpenMetadata Ingestion Framework.",
|
"description": "Configuration for Processor Component in the OpenMetadata Ingestion Framework.",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user