mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-02 19:48:17 +00:00
parent
519b5b3a6b
commit
273b3e9c4b
@ -609,6 +609,7 @@ class MetadataRestSink(Sink[Entity]):
|
|||||||
f"{db_schema.name.__root__}.{to_table_name}",
|
f"{db_schema.name.__root__}.{to_table_name}",
|
||||||
db.service.name,
|
db.service.name,
|
||||||
db_schema_and_table.database.name.__root__,
|
db_schema_and_table.database.name.__root__,
|
||||||
|
db_schema_and_table.table.viewDefinition.__root__,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Failed to create view lineage")
|
logger.error("Failed to create view lineage")
|
||||||
|
|||||||
@ -30,7 +30,6 @@ from metadata.generated.schema.entity.services.connections.metadata.openMetadata
|
|||||||
OpenMetadataConnection,
|
OpenMetadataConnection,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.services.dashboardService import (
|
from metadata.generated.schema.entity.services.dashboardService import (
|
||||||
DashboardService,
|
|
||||||
DashboardServiceType,
|
DashboardServiceType,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
@ -38,11 +37,10 @@ from metadata.generated.schema.metadataIngestion.workflow import (
|
|||||||
)
|
)
|
||||||
from metadata.generated.schema.type.entityLineage import EntitiesEdge
|
from metadata.generated.schema.type.entityLineage import EntitiesEdge
|
||||||
from metadata.generated.schema.type.entityReference import EntityReference
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
from metadata.ingestion.api.common import Entity
|
|
||||||
from metadata.ingestion.api.source import InvalidSourceException, SourceStatus
|
from metadata.ingestion.api.source import InvalidSourceException, SourceStatus
|
||||||
from metadata.ingestion.models.table_metadata import Chart, Dashboard, DashboardOwner
|
from metadata.ingestion.models.table_metadata import Chart, Dashboard, DashboardOwner
|
||||||
from metadata.ingestion.source.dashboard.dashboard_source import DashboardSourceService
|
from metadata.ingestion.source.dashboard.dashboard_source import DashboardSourceService
|
||||||
from metadata.utils.fqn import FQN_SEPARATOR
|
from metadata.utils import fqn
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
|
||||||
logger = ingestion_logger()
|
logger = ingestion_logger()
|
||||||
@ -106,46 +104,6 @@ def get_owners(owners_obj):
|
|||||||
return owners
|
return owners
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-return-statements, too-many-branches
|
|
||||||
def get_service_type_from_database_uri(uri: str) -> str:
|
|
||||||
"""
|
|
||||||
Get service type from database URI
|
|
||||||
|
|
||||||
Args:
|
|
||||||
uri (str):
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str
|
|
||||||
"""
|
|
||||||
if uri.startswith("bigquery"):
|
|
||||||
return "bigquery"
|
|
||||||
if uri.startswith("druid"):
|
|
||||||
return "druid"
|
|
||||||
if uri.startswith("mssql"):
|
|
||||||
return "mssql"
|
|
||||||
if uri.startswith("jdbc:postgres:") and uri.index("redshift.amazonaws") > 0:
|
|
||||||
return "redshift"
|
|
||||||
if uri.startswith("snowflake"):
|
|
||||||
return "snowflake"
|
|
||||||
if uri.startswith("presto"):
|
|
||||||
return "presto"
|
|
||||||
if uri.startswith("trino"):
|
|
||||||
return "trino"
|
|
||||||
if uri.startswith("postgresql"):
|
|
||||||
return "postgres"
|
|
||||||
if uri.startswith("pinot"):
|
|
||||||
return "pinot"
|
|
||||||
if uri.startswith("oracle"):
|
|
||||||
return "oracle"
|
|
||||||
if uri.startswith("mysql"):
|
|
||||||
return "mysql"
|
|
||||||
if uri.startswith("mongodb"):
|
|
||||||
return "mongodb"
|
|
||||||
if uri.startswith("hive"):
|
|
||||||
return "hive"
|
|
||||||
return "external"
|
|
||||||
|
|
||||||
|
|
||||||
class SupersetSource(DashboardSourceService):
|
class SupersetSource(DashboardSourceService):
|
||||||
"""
|
"""
|
||||||
Superset source class
|
Superset source class
|
||||||
@ -187,6 +145,22 @@ class SupersetSource(DashboardSourceService):
|
|||||||
)
|
)
|
||||||
return cls(config, metadata_config)
|
return cls(config, metadata_config)
|
||||||
|
|
||||||
|
def prepare(self):
|
||||||
|
"""
|
||||||
|
Fetching all charts available in superset
|
||||||
|
this step is done because fetch_total_charts api fetches all
|
||||||
|
the required information which is not available in fetch_charts_with_id api
|
||||||
|
"""
|
||||||
|
self.all_charts = {}
|
||||||
|
current_page = 0
|
||||||
|
page_size = 25
|
||||||
|
total_charts = self.client.fetch_total_charts()
|
||||||
|
while current_page * page_size <= total_charts:
|
||||||
|
charts = self.client.fetch_charts(current_page, page_size)
|
||||||
|
current_page += 1
|
||||||
|
for i in range(len(charts["result"])):
|
||||||
|
self.all_charts[charts["ids"][i]] = charts["result"][i]
|
||||||
|
|
||||||
def get_dashboards_list(self) -> Optional[List[object]]:
|
def get_dashboards_list(self) -> Optional[List[object]]:
|
||||||
"""
|
"""
|
||||||
Get List of all dashboards
|
Get List of all dashboards
|
||||||
@ -204,7 +178,7 @@ class SupersetSource(DashboardSourceService):
|
|||||||
"""
|
"""
|
||||||
Get Dashboard Name
|
Get Dashboard Name
|
||||||
"""
|
"""
|
||||||
return dashboard_details["id"]
|
return dashboard_details["dashboard_title"]
|
||||||
|
|
||||||
def get_dashboard_details(self, dashboard: dict) -> dict:
|
def get_dashboard_details(self, dashboard: dict) -> dict:
|
||||||
"""
|
"""
|
||||||
@ -216,7 +190,7 @@ class SupersetSource(DashboardSourceService):
|
|||||||
"""
|
"""
|
||||||
Method to Get Dashboard Entity
|
Method to Get Dashboard Entity
|
||||||
"""
|
"""
|
||||||
self.fetch_dashboard_charts(dashboard_details)
|
yield from self.fetch_dashboard_charts(dashboard_details)
|
||||||
last_modified = (
|
last_modified = (
|
||||||
dateparser.parse(dashboard_details.get("changed_on_utc", "now")).timestamp()
|
dateparser.parse(dashboard_details.get("changed_on_utc", "now")).timestamp()
|
||||||
* 1000
|
* 1000
|
||||||
@ -232,81 +206,94 @@ class SupersetSource(DashboardSourceService):
|
|||||||
lastModified=last_modified,
|
lastModified=last_modified,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _get_charts_of_dashboard(self, dashboard_details: dict) -> List[str]:
|
||||||
|
"""
|
||||||
|
Method to fetch chart ids linked to dashboard
|
||||||
|
"""
|
||||||
|
raw_position_data = dashboard_details.get("position_json", {})
|
||||||
|
if raw_position_data:
|
||||||
|
position_data = json.loads(raw_position_data)
|
||||||
|
return [
|
||||||
|
value.get("meta", {}).get("chartId", "unknown")
|
||||||
|
for key, value in position_data.items()
|
||||||
|
if key.startswith("CHART-")
|
||||||
|
]
|
||||||
|
return []
|
||||||
|
|
||||||
def get_lineage(self, dashboard_details: dict) -> Optional[AddLineageRequest]:
|
def get_lineage(self, dashboard_details: dict) -> Optional[AddLineageRequest]:
|
||||||
"""
|
"""
|
||||||
Get lineage between dashboard and data sources
|
Get lineage between dashboard and data sources
|
||||||
"""
|
"""
|
||||||
logger.info("Lineage not implemented for superset")
|
for chart_id in self._get_charts_of_dashboard(dashboard_details):
|
||||||
return None
|
chart_json = self.all_charts.get(chart_id)
|
||||||
|
datasource_fqn = (
|
||||||
|
self._get_datasource_fqn(chart_json.get("datasource_id"))
|
||||||
|
if chart_json.get("datasource_id")
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
if not datasource_fqn:
|
||||||
|
continue
|
||||||
|
from_entity = self.metadata.get_by_name(
|
||||||
|
entity=Table,
|
||||||
|
fqn=datasource_fqn,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
dashboard_fqn = fqn.build(
|
||||||
|
self.metadata,
|
||||||
|
entity_type=Lineage_Dashboard,
|
||||||
|
service_name=self.config.serviceName,
|
||||||
|
dashboard_name=str(dashboard_details["id"]),
|
||||||
|
)
|
||||||
|
to_entity = self.metadata.get_by_name(
|
||||||
|
entity=Lineage_Dashboard,
|
||||||
|
fqn=dashboard_fqn,
|
||||||
|
)
|
||||||
|
if from_entity and to_entity:
|
||||||
|
lineage = AddLineageRequest(
|
||||||
|
edge=EntitiesEdge(
|
||||||
|
fromEntity=EntityReference(
|
||||||
|
id=from_entity.id.__root__, type="table"
|
||||||
|
),
|
||||||
|
toEntity=EntityReference(
|
||||||
|
id=to_entity.id.__root__, type="dashboard"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
yield lineage
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.error(err)
|
||||||
|
|
||||||
def fetch_dashboard_charts(self, dashboard_details: dict) -> None:
|
def fetch_dashboard_charts(self, dashboard_details: dict) -> None:
|
||||||
"""
|
"""
|
||||||
Metod to fetch charts linked to dashboard
|
Metod to fetch charts linked to dashboard
|
||||||
"""
|
"""
|
||||||
raw_position_data = dashboard_details.get("position_json", "{}")
|
|
||||||
self.charts = []
|
self.charts = []
|
||||||
if raw_position_data is not None:
|
for chart_id in self._get_charts_of_dashboard(dashboard_details):
|
||||||
position_data = json.loads(raw_position_data)
|
yield from self._build_chart(self.all_charts.get(chart_id))
|
||||||
for key, value in position_data.items():
|
self.charts.append(chart_id)
|
||||||
if not key.startswith("CHART-"):
|
|
||||||
continue
|
|
||||||
chart_id = value.get("meta", {}).get("chartId", "unknown")
|
|
||||||
self.charts.append(chart_id)
|
|
||||||
|
|
||||||
def _get_service_type_from_database_id(self, database_id):
|
def _get_datasource_fqn(self, datasource_id: str) -> Optional[str]:
|
||||||
database_json = self.client.fetch_database(database_id)
|
if not self.source_config.dbServiceName:
|
||||||
sqlalchemy_uri = database_json.get("result", {}).get("sqlalchemy_uri")
|
return
|
||||||
return get_service_type_from_database_uri(sqlalchemy_uri)
|
try:
|
||||||
|
datasource_json = self.client.fetch_datasource(datasource_id)
|
||||||
def _get_datasource_from_id(self, datasource_id):
|
database_json = self.client.fetch_database(
|
||||||
datasource_json = self.client.fetch_datasource(datasource_id)
|
datasource_json["result"]["database"]["id"]
|
||||||
schema_name = datasource_json.get("result", {}).get("schema")
|
)
|
||||||
table_name = datasource_json.get("result", {}).get("table_name")
|
dataset_fqn = fqn.build(
|
||||||
database_id = datasource_json.get("result", {}).get("database", {}).get("id")
|
self.metadata,
|
||||||
database_name = (
|
entity_type=Table,
|
||||||
datasource_json.get("result", {}).get("database", {}).get("database_name")
|
table_name=datasource_json["result"]["table_name"],
|
||||||
)
|
schema_name=datasource_json["result"]["schema"],
|
||||||
|
database_name=database_json["result"]["parameters"]["database"],
|
||||||
if database_id and table_name:
|
service_name=self.source_config.dbServiceName,
|
||||||
platform = self._get_service_type_from_database_id(database_id)
|
|
||||||
dataset_fqn = (
|
|
||||||
f"{platform}{FQN_SEPARATOR}{database_name + FQN_SEPARATOR if database_name else ''}"
|
|
||||||
f"{schema_name + FQN_SEPARATOR if schema_name else ''}"
|
|
||||||
f"{table_name}"
|
|
||||||
)
|
)
|
||||||
return dataset_fqn
|
return dataset_fqn
|
||||||
return None
|
except KeyError:
|
||||||
|
logger.warning(f"Failed to fetch Datasource with id: {datasource_id}")
|
||||||
def _check_lineage(self, chart_id, datasource_text):
|
return None
|
||||||
if datasource_text and hasattr(self.service_connection, "dbServiceName"):
|
|
||||||
chart_data = self.client.fetch_charts_with_id(chart_id)
|
|
||||||
dashboards = chart_data["result"].get("dashboards")
|
|
||||||
for dashboard in dashboards:
|
|
||||||
try:
|
|
||||||
from_entity = self.metadata.get_by_name(
|
|
||||||
entity=Table,
|
|
||||||
fqn=f"{self.service_connection.dbServiceName}.{datasource_text}",
|
|
||||||
)
|
|
||||||
to_entity = self.metadata.get_by_name(
|
|
||||||
entity=Lineage_Dashboard,
|
|
||||||
fqn=f"{self.config.serviceName}.{dashboard['id']}",
|
|
||||||
)
|
|
||||||
if from_entity and to_entity:
|
|
||||||
lineage = AddLineageRequest(
|
|
||||||
edge=EntitiesEdge(
|
|
||||||
fromEntity=EntityReference(
|
|
||||||
id=from_entity.id.__root__, type="table"
|
|
||||||
),
|
|
||||||
toEntity=EntityReference(
|
|
||||||
id=to_entity.id.__root__, type="dashboard"
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
yield lineage
|
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
logger.debug(traceback.format_exc())
|
|
||||||
logger.error(err)
|
|
||||||
|
|
||||||
# pylint: disable=too-many-locals
|
# pylint: disable=too-many-locals
|
||||||
def _build_chart(self, chart_json: dict) -> Chart:
|
def _build_chart(self, chart_json: dict) -> Chart:
|
||||||
@ -339,10 +326,11 @@ class SupersetSource(DashboardSourceService):
|
|||||||
chart_type=chart_json["viz_type"],
|
chart_type=chart_json["viz_type"],
|
||||||
url=chart_json["url"],
|
url=chart_json["url"],
|
||||||
owners=get_owners(chart_json["owners"]),
|
owners=get_owners(chart_json["owners"]),
|
||||||
datasource_fqn=self._get_datasource_from_id(chart_json["datasource_id"]),
|
datasource_fqn=self._get_datasource_fqn(chart_json["datasource_id"])
|
||||||
|
if chart_json["datasource_id"]
|
||||||
|
else None,
|
||||||
lastModified=last_modified,
|
lastModified=last_modified,
|
||||||
service=EntityReference(id=self.service.id, type="dashboardService"),
|
service=EntityReference(id=self.service.id, type="dashboardService"),
|
||||||
custom_props=custom_properties,
|
custom_props=custom_properties,
|
||||||
)
|
)
|
||||||
yield from self._check_lineage(chart_id, chart_json.get("datasource_name_text"))
|
|
||||||
yield chart
|
yield chart
|
||||||
|
|||||||
@ -130,7 +130,7 @@ def _create_lineage_by_table_name(
|
|||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
service_name=service_name,
|
service_name=service_name,
|
||||||
)
|
)
|
||||||
to_raw_name = get_formatted_entity_name(str(from_table))
|
to_raw_name = get_formatted_entity_name(str(to_table))
|
||||||
to_table_obj = split_raw_table_name(database=database, raw_name=to_raw_name)
|
to_table_obj = split_raw_table_name(database=database, raw_name=to_raw_name)
|
||||||
to_entities = search_table_entities(
|
to_entities = search_table_entities(
|
||||||
table=to_table_obj.get("table"),
|
table=to_table_obj.get("table"),
|
||||||
@ -154,22 +154,23 @@ def _create_lineage_by_table_name(
|
|||||||
lineage_details = LineageDetails(
|
lineage_details = LineageDetails(
|
||||||
sqlQuery=query, columnsLineage=col_lineage
|
sqlQuery=query, columnsLineage=col_lineage
|
||||||
)
|
)
|
||||||
lineage = AddLineageRequest(
|
if from_entity and to_entity:
|
||||||
edge=EntitiesEdge(
|
lineage = AddLineageRequest(
|
||||||
fromEntity=EntityReference(
|
edge=EntitiesEdge(
|
||||||
id=from_entity.id.__root__,
|
fromEntity=EntityReference(
|
||||||
type="table",
|
id=from_entity.id.__root__,
|
||||||
),
|
type="table",
|
||||||
toEntity=EntityReference(
|
),
|
||||||
id=to_entity.id.__root__,
|
toEntity=EntityReference(
|
||||||
type="table",
|
id=to_entity.id.__root__,
|
||||||
),
|
type="table",
|
||||||
|
),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
if lineage_details:
|
||||||
if lineage_details:
|
lineage.edge.lineageDetails = lineage_details
|
||||||
lineage.edge.lineageDetails = lineage_details
|
created_lineage = metadata.add_lineage(lineage)
|
||||||
created_lineage = metadata.add_lineage(lineage)
|
logger.info(f"Successfully added Lineage {created_lineage}")
|
||||||
logger.info(f"Successfully added Lineage {created_lineage}")
|
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user