mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-26 17:34:41 +00:00
Metadata Source Lint (#8018)
This commit is contained in:
parent
b8e989af6c
commit
13f6f79f30
@ -9,12 +9,15 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Amundsen source to extract metadata
|
||||||
|
"""
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Iterable, List, Optional
|
from typing import Iterable, List, Optional
|
||||||
|
|
||||||
from pydantic import SecretStr
|
from pydantic import SecretStr
|
||||||
from sqlalchemy.engine.url import make_url
|
from sqlalchemy.engine.url import make_url
|
||||||
from tomlkit import table
|
|
||||||
|
|
||||||
from metadata.clients.neo4j_client import Neo4JConfig, Neo4jHelper
|
from metadata.clients.neo4j_client import Neo4JConfig, Neo4jHelper
|
||||||
from metadata.config.common import ConfigModel
|
from metadata.config.common import ConfigModel
|
||||||
@ -100,25 +103,31 @@ SUPERSET_DEFAULT_CONFIG = {
|
|||||||
|
|
||||||
|
|
||||||
class AmundsenStatus(SourceStatus):
|
class AmundsenStatus(SourceStatus):
|
||||||
success: List[str] = list()
|
success: List[str] = []
|
||||||
failures: List[str] = list()
|
failures: List[str] = []
|
||||||
warnings: List[str] = list()
|
warnings: List[str] = []
|
||||||
filtered: List[str] = list()
|
filtered: List[str] = []
|
||||||
|
|
||||||
def scanned(self, entity_name: str) -> None:
|
def scanned(self, record: str) -> None:
|
||||||
self.success.append(entity_name)
|
self.success.append(record)
|
||||||
logger.info("Entity Scanned: {}".format(entity_name))
|
logger.info(f"Entity Scanned: {record}")
|
||||||
|
|
||||||
def failure(self, key: str, reason: str) -> None:
|
def failure(self, key: str, reason: str) -> None:
|
||||||
self.failures.append({key: reason})
|
self.failures.append({key: reason})
|
||||||
|
|
||||||
|
|
||||||
class AmundsenSource(Source[Entity]):
|
class AmundsenSource(Source[Entity]):
|
||||||
|
"""
|
||||||
|
Amundsen source class
|
||||||
|
"""
|
||||||
|
|
||||||
dashboard_service: DashboardService
|
dashboard_service: DashboardService
|
||||||
|
|
||||||
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.metadata_config = metadata_config
|
self.metadata_config = metadata_config
|
||||||
|
self.database_schema_object = None
|
||||||
|
self.database_object = None
|
||||||
self.metadata = OpenMetadata(self.metadata_config)
|
self.metadata = OpenMetadata(self.metadata_config)
|
||||||
self.service_connection = self.config.serviceConnection.__root__.config
|
self.service_connection = self.config.serviceConnection.__root__.config
|
||||||
neo4j_config = Neo4JConfig(
|
neo4j_config = Neo4JConfig(
|
||||||
@ -207,13 +216,14 @@ class AmundsenSource(Source[Entity]):
|
|||||||
entity=DatabaseService, fqn=service_url.get_backend_name()
|
entity=DatabaseService, fqn=service_url.get_backend_name()
|
||||||
)
|
)
|
||||||
if service_entity:
|
if service_entity:
|
||||||
table_fqn = "{service}.{database_schema}.{table}".format(
|
service = service_url.get_backend_name()
|
||||||
service=service_url.get_backend_name(),
|
database_schema = (
|
||||||
database_schema=service_url.host
|
service_url.host
|
||||||
if hasattr(service_entity.connection.config, "supportsDatabase")
|
if hasattr(service_entity.connection.config, "supportsDatabase")
|
||||||
else f"default.{service_url.host.split('.')[-1]}",
|
else f"default.{service_url.host.split('.')[-1]}"
|
||||||
table=service_url.database,
|
|
||||||
)
|
)
|
||||||
|
table = service_url.database
|
||||||
|
table_fqn = f"{service}.{database_schema}.{table}"
|
||||||
table_entity: Table = self.metadata.get_by_name(
|
table_entity: Table = self.metadata.get_by_name(
|
||||||
entity=Table, fqn=table_fqn
|
entity=Table, fqn=table_fqn
|
||||||
)
|
)
|
||||||
@ -245,11 +255,9 @@ class AmundsenSource(Source[Entity]):
|
|||||||
yield tag_category
|
yield tag_category
|
||||||
logger.info(f"Tag Category {tag_category}, Primary Tag {tag} Ingested")
|
logger.info(f"Tag Category {tag_category}, Primary Tag {tag} Ingested")
|
||||||
|
|
||||||
def create_table_entity(self, table):
|
def _yield_create_database(self, table):
|
||||||
try:
|
try:
|
||||||
service_name = table["database"]
|
service_entity = self.get_database_service(table["database"])
|
||||||
# TODO: use metadata.get_service_or_create
|
|
||||||
service_entity = self.get_database_service(service_name)
|
|
||||||
|
|
||||||
database_request = CreateDatabaseRequest(
|
database_request = CreateDatabaseRequest(
|
||||||
name=table["cluster"]
|
name=table["cluster"]
|
||||||
@ -263,32 +271,47 @@ class AmundsenSource(Source[Entity]):
|
|||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
service_name=service_name,
|
service_name=table["database"],
|
||||||
database_name=database_request.name.__root__,
|
database_name=table["cluster"],
|
||||||
)
|
)
|
||||||
|
|
||||||
database_object = self.metadata.get_by_name(
|
self.database_object = self.metadata.get_by_name(
|
||||||
entity=Database, fqn=database_fqn
|
entity=Database, fqn=database_fqn
|
||||||
)
|
)
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Failed to Ingest database due to - {err}")
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
|
def _yield_create_database_schema(self, table):
|
||||||
|
try:
|
||||||
database_schema_request = CreateDatabaseSchemaRequest(
|
database_schema_request = CreateDatabaseSchemaRequest(
|
||||||
name=table["schema"],
|
name=table["schema"],
|
||||||
database=EntityReference(id=database_object.id, type="database"),
|
database=EntityReference(id=self.database_object.id, type="database"),
|
||||||
)
|
)
|
||||||
yield database_schema_request
|
yield database_schema_request
|
||||||
|
|
||||||
database_schema_fqn = fqn.build(
|
database_schema_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=DatabaseSchema,
|
entity_type=DatabaseSchema,
|
||||||
service_name=service_name,
|
service_name=table["database"],
|
||||||
database_name=database_request.name.__root__,
|
database_name=table["cluster"],
|
||||||
schema_name=database_schema_request.name.__root__,
|
schema_name=database_schema_request.name.__root__,
|
||||||
)
|
)
|
||||||
|
|
||||||
database_schema_object = self.metadata.get_by_name(
|
self.database_schema_object = self.metadata.get_by_name(
|
||||||
entity=DatabaseSchema, fqn=database_schema_fqn
|
entity=DatabaseSchema, fqn=database_schema_fqn
|
||||||
)
|
)
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Failed to Ingest database due to - {err}")
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
|
def create_table_entity(self, table):
|
||||||
|
"""
|
||||||
|
Process table details and return CreateTableRequest
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
yield from self._yield_create_database(table)
|
||||||
|
yield from self._yield_create_database_schema(table)
|
||||||
columns: List[Column] = []
|
columns: List[Column] = []
|
||||||
if len(table["column_names"]) == len(table["column_descriptions"]):
|
if len(table["column_names"]) == len(table["column_descriptions"]):
|
||||||
# zipping on column_descriptions can cause incorrect or no ingestion
|
# zipping on column_descriptions can cause incorrect or no ingestion
|
||||||
@ -308,7 +331,9 @@ class AmundsenSource(Source[Entity]):
|
|||||||
# Amundsen merges the length into type itself. Instead of making changes to our generic type builder
|
# Amundsen merges the length into type itself. Instead of making changes to our generic type builder
|
||||||
# we will do a type match and see if it matches any primitive types and return a type
|
# we will do a type match and see if it matches any primitive types and return a type
|
||||||
data_type = self.get_type_primitive_type(data_type)
|
data_type = self.get_type_primitive_type(data_type)
|
||||||
parsed_string = ColumnTypeParser._parse_datatype_string(data_type)
|
parsed_string = ColumnTypeParser._parse_datatype_string( # pylint: disable=protected-access
|
||||||
|
data_type
|
||||||
|
) # pylint: disable=protected-access
|
||||||
parsed_string["name"] = name
|
parsed_string["name"] = name
|
||||||
parsed_string["dataLength"] = 1
|
parsed_string["dataLength"] = 1
|
||||||
parsed_string["description"] = description
|
parsed_string["description"] = description
|
||||||
@ -383,7 +408,7 @@ class AmundsenSource(Source[Entity]):
|
|||||||
tableType="Regular",
|
tableType="Regular",
|
||||||
description=table["description"],
|
description=table["description"],
|
||||||
databaseSchema=EntityReference(
|
databaseSchema=EntityReference(
|
||||||
id=database_schema_object.id, type="databaseSchema"
|
id=self.database_schema_object.id, type="databaseSchema"
|
||||||
),
|
),
|
||||||
tags=tags,
|
tags=tags,
|
||||||
columns=columns,
|
columns=columns,
|
||||||
@ -396,7 +421,6 @@ class AmundsenSource(Source[Entity]):
|
|||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.warning(f"Failed to create table entity [{table}]: {exc}")
|
logger.warning(f"Failed to create table entity [{table}]: {exc}")
|
||||||
self.status.failure(table["name"], str(exc))
|
self.status.failure(table["name"], str(exc))
|
||||||
return None
|
|
||||||
|
|
||||||
def create_dashboard_service(self, dashboard: dict):
|
def create_dashboard_service(self, dashboard: dict):
|
||||||
service_name = dashboard["cluster"]
|
service_name = dashboard["cluster"]
|
||||||
@ -412,6 +436,9 @@ class AmundsenSource(Source[Entity]):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def create_dashboard_entity(self, dashboard):
|
def create_dashboard_entity(self, dashboard):
|
||||||
|
"""
|
||||||
|
Method to process dashboard and return CreateDashboardRequest
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
self.status.scanned(dashboard["name"])
|
self.status.scanned(dashboard["name"])
|
||||||
yield CreateDashboardRequest(
|
yield CreateDashboardRequest(
|
||||||
@ -432,7 +459,6 @@ class AmundsenSource(Source[Entity]):
|
|||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.warning(f"Failed to create dashboard entity [{dashboard}]: {exc}")
|
logger.warning(f"Failed to create dashboard entity [{dashboard}]: {exc}")
|
||||||
self.status.failure(dashboard["name"], str(exc))
|
self.status.failure(dashboard["name"], str(exc))
|
||||||
return None
|
|
||||||
|
|
||||||
def create_chart_entity(self, dashboard):
|
def create_chart_entity(self, dashboard):
|
||||||
for (name, chart_id, chart_type, url) in zip(
|
for (name, chart_id, chart_type, url) in zip(
|
||||||
@ -471,8 +497,8 @@ class AmundsenSource(Source[Entity]):
|
|||||||
service = self.metadata.get_by_name(entity=DatabaseService, fqn=service_name)
|
service = self.metadata.get_by_name(entity=DatabaseService, fqn=service_name)
|
||||||
if service is not None:
|
if service is not None:
|
||||||
return service
|
return service
|
||||||
else:
|
|
||||||
logger.error(f"Please create a service with name {service_name}")
|
logger.error(f"Please create a service with name {service_name}")
|
||||||
|
return None
|
||||||
|
|
||||||
def test_connection(self) -> None:
|
def test_connection(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
@ -1,3 +1,18 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Atlas source to extract metadata
|
||||||
|
"""
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -40,8 +55,8 @@ logger = ingestion_logger()
|
|||||||
|
|
||||||
|
|
||||||
class AtlasSourceStatus(SourceStatus):
|
class AtlasSourceStatus(SourceStatus):
|
||||||
tables_scanned: List[str] = list()
|
tables_scanned: List[str] = []
|
||||||
filtered: List[str] = list()
|
filtered: List[str] = []
|
||||||
|
|
||||||
def table_scanned(self, table: str) -> None:
|
def table_scanned(self, table: str) -> None:
|
||||||
self.tables_scanned.append(table)
|
self.tables_scanned.append(table)
|
||||||
@ -52,6 +67,10 @@ class AtlasSourceStatus(SourceStatus):
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class AtlasSource(Source):
|
class AtlasSource(Source):
|
||||||
|
"""
|
||||||
|
Atlas source class
|
||||||
|
"""
|
||||||
|
|
||||||
config: WorkflowSource
|
config: WorkflowSource
|
||||||
atlas_client: AtlasClient
|
atlas_client: AtlasClient
|
||||||
status: AtlasSourceStatus
|
status: AtlasSourceStatus
|
||||||
@ -63,7 +82,6 @@ class AtlasSource(Source):
|
|||||||
config: WorkflowSource,
|
config: WorkflowSource,
|
||||||
metadata_config: OpenMetadataConnection,
|
metadata_config: OpenMetadataConnection,
|
||||||
):
|
):
|
||||||
super().__init__()
|
|
||||||
self.config = config
|
self.config = config
|
||||||
self.metadata_config = metadata_config
|
self.metadata_config = metadata_config
|
||||||
self.metadata = OpenMetadata(metadata_config)
|
self.metadata = OpenMetadata(metadata_config)
|
||||||
@ -78,11 +96,18 @@ class AtlasSource(Source):
|
|||||||
if not path.is_file():
|
if not path.is_file():
|
||||||
logger.error(f"File not found {self.service_connection.entityTypes}")
|
logger.error(f"File not found {self.service_connection.entityTypes}")
|
||||||
raise FileNotFoundError()
|
raise FileNotFoundError()
|
||||||
with open(self.service_connection.entityTypes, "r") as f:
|
with open(
|
||||||
self.service_connection.entityTypes = yaml.load(f, Loader=yaml.SafeLoader)
|
self.service_connection.entityTypes, "r", encoding="utf-8"
|
||||||
|
) as entity_types_file:
|
||||||
|
self.service_connection.entityTypes = yaml.load(
|
||||||
|
entity_types_file, Loader=yaml.SafeLoader
|
||||||
|
)
|
||||||
self.tables: Dict[str, Any] = {}
|
self.tables: Dict[str, Any] = {}
|
||||||
self.topics: Dict[str, Any] = {}
|
self.topics: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
self.service = None
|
||||||
|
self.message_service = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||||
|
|
||||||
@ -98,7 +123,6 @@ class AtlasSource(Source):
|
|||||||
"""
|
"""
|
||||||
Not required to implement
|
Not required to implement
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
|
||||||
def next_record(self):
|
def next_record(self):
|
||||||
for key in self.service_connection.entityTypes["Table"].keys():
|
for key in self.service_connection.entityTypes["Table"].keys():
|
||||||
@ -121,13 +145,15 @@ class AtlasSource(Source):
|
|||||||
yield from self._parse_topic_entity(topic)
|
yield from self._parse_topic_entity(topic)
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
return super().close()
|
"""
|
||||||
|
Not required to implement
|
||||||
|
"""
|
||||||
|
|
||||||
def get_status(self) -> SourceStatus:
|
def get_status(self) -> SourceStatus:
|
||||||
return self.status
|
return self.status
|
||||||
|
|
||||||
def _parse_topic_entity(self, name):
|
def _parse_topic_entity(self, name):
|
||||||
for key in self.topics.keys():
|
for key in self.topics:
|
||||||
topic_entity = self.atlas_client.get_entity(self.topics[key])
|
topic_entity = self.atlas_client.get_entity(self.topics[key])
|
||||||
tpc_entities = topic_entity["entities"]
|
tpc_entities = topic_entity["entities"]
|
||||||
for tpc_entity in tpc_entities:
|
for tpc_entity in tpc_entities:
|
||||||
@ -164,55 +190,45 @@ class AtlasSource(Source):
|
|||||||
db_entity = tbl_entity["relationshipAttributes"][
|
db_entity = tbl_entity["relationshipAttributes"][
|
||||||
self.service_connection.entityTypes["Table"][name]["db"]
|
self.service_connection.entityTypes["Table"][name]["db"]
|
||||||
]
|
]
|
||||||
database_request = self.get_database_entity(
|
yield self.get_database_entity(db_entity["displayText"])
|
||||||
db_entity["displayText"]
|
|
||||||
)
|
|
||||||
table_name = tbl_attrs["name"]
|
|
||||||
tbl_description = tbl_attrs["description"]
|
|
||||||
yield database_request
|
|
||||||
|
|
||||||
database_fqn = fqn.build(
|
database_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=Database,
|
entity_type=Database,
|
||||||
service_name=self.service_connection.dbService,
|
service_name=self.service_connection.dbService,
|
||||||
database_name=database_request.name.__root__,
|
database_name=db_entity["displayText"],
|
||||||
)
|
)
|
||||||
|
|
||||||
database_object = self.metadata.get_by_name(
|
database_object = self.metadata.get_by_name(
|
||||||
entity=Database, fqn=database_fqn
|
entity=Database, fqn=database_fqn
|
||||||
)
|
)
|
||||||
|
|
||||||
tbl_attrs = tbl_entity["attributes"]
|
yield CreateDatabaseSchemaRequest(
|
||||||
db_entity = tbl_entity["relationshipAttributes"]["db"]
|
|
||||||
|
|
||||||
database_schema_request = CreateDatabaseSchemaRequest(
|
|
||||||
name=db_entity["displayText"],
|
name=db_entity["displayText"],
|
||||||
database=EntityReference(
|
database=EntityReference(
|
||||||
id=database_object.id, type="database"
|
id=database_object.id, type="database"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
yield database_schema_request
|
|
||||||
|
|
||||||
database_schema_fqn = fqn.build(
|
database_schema_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=DatabaseSchema,
|
entity_type=DatabaseSchema,
|
||||||
service_name=self.config.serviceConnection.__root__.config.dbService,
|
service_name=self.config.serviceConnection.__root__.config.dbService,
|
||||||
database_name=database_request.name.__root__,
|
database_name=db_entity["displayText"],
|
||||||
schema_name=database_schema_request.name.__root__,
|
schema_name=db_entity["displayText"],
|
||||||
)
|
)
|
||||||
database_schema_object = self.metadata.get_by_name(
|
database_schema_object = self.metadata.get_by_name(
|
||||||
entity=DatabaseSchema, fqn=database_schema_fqn
|
entity=DatabaseSchema, fqn=database_schema_fqn
|
||||||
)
|
)
|
||||||
|
|
||||||
table_request = CreateTableRequest(
|
yield CreateTableRequest(
|
||||||
name=table_name,
|
name=tbl_attrs["name"],
|
||||||
databaseSchema=EntityReference(
|
databaseSchema=EntityReference(
|
||||||
id=database_schema_object.id, type="databaseSchema"
|
id=database_schema_object.id, type="databaseSchema"
|
||||||
),
|
),
|
||||||
description=tbl_description,
|
description=tbl_attrs["description"],
|
||||||
columns=tbl_columns,
|
columns=tbl_columns,
|
||||||
)
|
)
|
||||||
yield table_request
|
|
||||||
|
|
||||||
yield from self.ingest_lineage(tbl_entity["guid"], name)
|
yield from self.ingest_lineage(tbl_entity["guid"], name)
|
||||||
|
|
||||||
@ -239,7 +255,7 @@ class AtlasSource(Source):
|
|||||||
dataType=ColumnTypeParser.get_column_type(
|
dataType=ColumnTypeParser.get_column_type(
|
||||||
column["dataType"].upper()
|
column["dataType"].upper()
|
||||||
),
|
),
|
||||||
dataTypeDisplay="{}({})".format(column["dataType"], "1"),
|
dataTypeDisplay=column["dataType"],
|
||||||
dataLength=col_data_length,
|
dataLength=col_data_length,
|
||||||
ordinalPosition=ordinal_pos,
|
ordinalPosition=ordinal_pos,
|
||||||
)
|
)
|
||||||
@ -257,6 +273,9 @@ class AtlasSource(Source):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def ingest_lineage(self, source_guid, name) -> Iterable[AddLineageRequest]:
|
def ingest_lineage(self, source_guid, name) -> Iterable[AddLineageRequest]:
|
||||||
|
"""
|
||||||
|
Fetch and ingest lineage
|
||||||
|
"""
|
||||||
lineage_response = self.atlas_client.get_lineage(source_guid)
|
lineage_response = self.atlas_client.get_lineage(source_guid)
|
||||||
lineage_relations = lineage_response["relations"]
|
lineage_relations = lineage_response["relations"]
|
||||||
tbl_entity = self.atlas_client.get_entity(lineage_response["baseEntityGuid"])
|
tbl_entity = self.atlas_client.get_entity(lineage_response["baseEntityGuid"])
|
||||||
@ -321,18 +340,19 @@ class AtlasSource(Source):
|
|||||||
)
|
)
|
||||||
yield lineage
|
yield lineage
|
||||||
|
|
||||||
def get_lineage_entity_ref(self, fqn, metadata_config, type) -> EntityReference:
|
def get_lineage_entity_ref(
|
||||||
|
self, to_fqn, metadata_config, entity_type
|
||||||
|
) -> EntityReference:
|
||||||
metadata = OpenMetadata(metadata_config)
|
metadata = OpenMetadata(metadata_config)
|
||||||
if type == "table":
|
if entity_type == "table":
|
||||||
table = metadata.get_by_name(entity=Table, fqn=fqn)
|
table = metadata.get_by_name(entity=Table, fqn=to_fqn)
|
||||||
if not table:
|
if table:
|
||||||
return
|
|
||||||
return EntityReference(id=table.id.__root__, type="table")
|
return EntityReference(id=table.id.__root__, type="table")
|
||||||
elif type == "pipeline":
|
if entity_type == "pipeline":
|
||||||
pipeline = metadata.get_by_name(entity=Pipeline, fqn=fqn)
|
pipeline = metadata.get_by_name(entity=Pipeline, fqn=to_fqn)
|
||||||
if not pipeline:
|
if pipeline:
|
||||||
return
|
|
||||||
return EntityReference(id=pipeline.id.__root__, type="pipeline")
|
return EntityReference(id=pipeline.id.__root__, type="pipeline")
|
||||||
|
return None
|
||||||
|
|
||||||
def test_connection(self) -> None:
|
def test_connection(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
@ -42,9 +42,9 @@ logger = ingestion_logger()
|
|||||||
|
|
||||||
class MetadataSourceStatus(SourceStatus):
|
class MetadataSourceStatus(SourceStatus):
|
||||||
|
|
||||||
success: List[str] = list()
|
success: List[str] = []
|
||||||
failures: List[str] = list()
|
failures: List[str] = []
|
||||||
warnings: List[str] = list()
|
warnings: List[str] = []
|
||||||
|
|
||||||
def scanned_entity(self, entity_class_name: str, entity_name: str) -> None:
|
def scanned_entity(self, entity_class_name: str, entity_name: str) -> None:
|
||||||
self.success.append(entity_name)
|
self.success.append(entity_name)
|
||||||
@ -59,6 +59,9 @@ class MetadataSourceStatus(SourceStatus):
|
|||||||
|
|
||||||
|
|
||||||
class MetadataSource(Source[Entity]):
|
class MetadataSource(Source[Entity]):
|
||||||
|
"""
|
||||||
|
Metadata Source to Fetch All Entities from backend
|
||||||
|
"""
|
||||||
|
|
||||||
config: WorkflowSource
|
config: WorkflowSource
|
||||||
report: SourceStatus
|
report: SourceStatus
|
||||||
@ -85,7 +88,7 @@ class MetadataSource(Source[Entity]):
|
|||||||
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||||
raise NotImplementedError("Create Method not implemented")
|
raise NotImplementedError("Create Method not implemented")
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Entity]:
|
def next_record(self) -> Iterable[Entity]: # pylint: disable=too-many-branches
|
||||||
if self.service_connection.includeTables:
|
if self.service_connection.includeTables:
|
||||||
yield from self.fetch_entities(
|
yield from self.fetch_entities(
|
||||||
entity_class=Table,
|
entity_class=Table,
|
||||||
|
@ -27,6 +27,10 @@ logger = ingestion_logger()
|
|||||||
|
|
||||||
|
|
||||||
class MetadataElasticsearchSource(MetadataSource):
|
class MetadataElasticsearchSource(MetadataSource):
|
||||||
|
"""
|
||||||
|
Metadata Elastic Search Source
|
||||||
|
Used for metadata to ES pipeline
|
||||||
|
"""
|
||||||
|
|
||||||
config: WorkflowSource
|
config: WorkflowSource
|
||||||
report: SourceStatus
|
report: SourceStatus
|
||||||
|
@ -70,17 +70,14 @@ class DatabaseServiceWrapper:
|
|||||||
|
|
||||||
|
|
||||||
class MigrateSource(MetadataSource):
|
class MigrateSource(MetadataSource):
|
||||||
|
"""
|
||||||
|
Metadata Migrate source module
|
||||||
|
to migrate from 0.9 from 0.10
|
||||||
|
"""
|
||||||
|
|
||||||
config: WorkflowSource
|
config: WorkflowSource
|
||||||
report: SourceStatus
|
report: SourceStatus
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
config: WorkflowSource,
|
|
||||||
metadata_config: OpenMetadataConnection,
|
|
||||||
):
|
|
||||||
super().__init__(config, metadata_config)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||||
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||||
@ -92,6 +89,9 @@ class MigrateSource(MetadataSource):
|
|||||||
return cls(config, metadata_config)
|
return cls(config, metadata_config)
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Entity]:
|
def next_record(self) -> Iterable[Entity]:
|
||||||
|
"""
|
||||||
|
Fetch all relebvent entities
|
||||||
|
"""
|
||||||
if self.service_connection.includeTables:
|
if self.service_connection.includeTables:
|
||||||
yield from self.fetch_tables(
|
yield from self.fetch_tables(
|
||||||
fields=[
|
fields=[
|
||||||
|
@ -25,17 +25,11 @@ logger = ingestion_logger()
|
|||||||
|
|
||||||
|
|
||||||
class OpenmetadataSource(MetadataSource):
|
class OpenmetadataSource(MetadataSource):
|
||||||
|
"""Metadata source Class"""
|
||||||
|
|
||||||
config: WorkflowSource
|
config: WorkflowSource
|
||||||
report: SourceStatus
|
report: SourceStatus
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
config: WorkflowSource,
|
|
||||||
metadata_config: OpenMetadataConnection,
|
|
||||||
):
|
|
||||||
super().__init__(config, metadata_config)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||||
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user