mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-03 21:03:48 +00:00
feat-21712: PowerBI internal entities & cross workspace lineage (#21837)
This commit is contained in:
parent
cfb2b4f4c8
commit
db2081cf4a
@ -239,6 +239,7 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
self.workspace_data.append(workspace)
|
self.workspace_data.append(workspace)
|
||||||
self.context.get().workspace = workspace
|
self.context.get().workspace = workspace
|
||||||
self.filtered_dashboards = []
|
self.filtered_dashboards = []
|
||||||
|
self.filtered_datamodels = []
|
||||||
for dashboard in self.get_dashboards_list() or []:
|
for dashboard in self.get_dashboards_list() or []:
|
||||||
dashboard_details = self.get_dashboard_details(dashboard)
|
dashboard_details = self.get_dashboard_details(dashboard)
|
||||||
dashboard_name = self.get_dashboard_name(dashboard_details)
|
dashboard_name = self.get_dashboard_name(dashboard_details)
|
||||||
@ -531,6 +532,7 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
):
|
):
|
||||||
self.status.filter(dataset.name, "Data model filtered out.")
|
self.status.filter(dataset.name, "Data model filtered out.")
|
||||||
continue
|
continue
|
||||||
|
self.filtered_datamodels.append(dataset)
|
||||||
if isinstance(dataset, Dataset):
|
if isinstance(dataset, Dataset):
|
||||||
data_model_type = DataModelType.PowerBIDataModel.value
|
data_model_type = DataModelType.PowerBIDataModel.value
|
||||||
datamodel_columns = self._get_column_info(dataset)
|
datamodel_columns = self._get_column_info(dataset)
|
||||||
@ -605,7 +607,7 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
except Exception as exc: # pylint: disable=broad-except
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
yield Either(
|
yield Either(
|
||||||
left=StackTraceError(
|
left=StackTraceError(
|
||||||
name="Lineage",
|
name="Report and Dashboard Lineage",
|
||||||
error=f"Error to yield report and dashboard lineage details: {exc}",
|
error=f"Error to yield report and dashboard lineage details: {exc}",
|
||||||
stackTrace=traceback.format_exc(),
|
stackTrace=traceback.format_exc(),
|
||||||
)
|
)
|
||||||
@ -613,7 +615,6 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
|
|
||||||
def create_datamodel_report_lineage(
|
def create_datamodel_report_lineage(
|
||||||
self,
|
self,
|
||||||
db_service_name: Optional[str],
|
|
||||||
dashboard_details: PowerBIReport,
|
dashboard_details: PowerBIReport,
|
||||||
) -> Iterable[Either[CreateDashboardRequest]]:
|
) -> Iterable[Either[CreateDashboardRequest]]:
|
||||||
"""
|
"""
|
||||||
@ -630,14 +631,11 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
entity=Dashboard,
|
entity=Dashboard,
|
||||||
fqn=report_fqn,
|
fqn=report_fqn,
|
||||||
)
|
)
|
||||||
|
|
||||||
dataset = self._fetch_dataset_from_workspace(dashboard_details.datasetId)
|
|
||||||
if dataset:
|
|
||||||
datamodel_fqn = fqn.build(
|
datamodel_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
entity_type=DashboardDataModel,
|
entity_type=DashboardDataModel,
|
||||||
service_name=self.context.get().dashboard_service,
|
service_name=self.context.get().dashboard_service,
|
||||||
data_model_name=dataset.id,
|
data_model_name=dashboard_details.datasetId,
|
||||||
)
|
)
|
||||||
datamodel_entity = self.metadata.get_by_name(
|
datamodel_entity = self.metadata.get_by_name(
|
||||||
entity=DashboardDataModel,
|
entity=DashboardDataModel,
|
||||||
@ -649,26 +647,12 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
to_entity=report_entity, from_entity=datamodel_entity
|
to_entity=report_entity, from_entity=datamodel_entity
|
||||||
)
|
)
|
||||||
|
|
||||||
for table in dataset.tables or []:
|
|
||||||
yield from self._get_table_and_datamodel_lineage(
|
|
||||||
db_service_name=db_service_name,
|
|
||||||
table=table,
|
|
||||||
datamodel_entity=datamodel_entity,
|
|
||||||
)
|
|
||||||
|
|
||||||
# create the lineage between table and datamodel using the pbit files
|
|
||||||
if self.client.file_client:
|
|
||||||
yield from self.create_table_datamodel_lineage_from_files(
|
|
||||||
db_service_name=db_service_name,
|
|
||||||
datamodel_entity=datamodel_entity,
|
|
||||||
)
|
|
||||||
except Exception as exc: # pylint: disable=broad-except
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
yield Either(
|
yield Either(
|
||||||
left=StackTraceError(
|
left=StackTraceError(
|
||||||
name=f"{db_service_name} Report Lineage",
|
name=f"Datamodel and Report Lineage",
|
||||||
error=(
|
error=(
|
||||||
"Error to yield datamodel and report lineage details for DB "
|
f"Error to yield datamodel and report lineage details: {exc}"
|
||||||
f"service name [{db_service_name}]: {exc}"
|
|
||||||
),
|
),
|
||||||
stackTrace=traceback.format_exc(),
|
stackTrace=traceback.format_exc(),
|
||||||
)
|
)
|
||||||
@ -891,7 +875,148 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
name="DataModel Lineage",
|
name="DataModel Lineage",
|
||||||
error=(
|
error=(
|
||||||
"Error to yield datamodel lineage details for DB "
|
"Error to yield datamodel lineage details for DB "
|
||||||
f"service name [{db_service_name}]: {exc}"
|
f"service name [{str(db_service_name)}]: {exc}"
|
||||||
|
),
|
||||||
|
stackTrace=traceback.format_exc(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_dataset_upstream_dataflow_lineage(
|
||||||
|
self, datamodel: Dataset, datamodel_entity: DashboardDataModel
|
||||||
|
) -> Iterable[Either[AddLineageRequest]]:
|
||||||
|
"""
|
||||||
|
Create lineage between dataset and upstreamDataflow
|
||||||
|
"""
|
||||||
|
for upstream_dataflow in datamodel.upstreamDataflows or []:
|
||||||
|
try:
|
||||||
|
if not upstream_dataflow.targetDataflowId:
|
||||||
|
logger.debug(
|
||||||
|
f"No targetDataflowId found for upstreamDataflow in "
|
||||||
|
f"datamodel [{datamodel_entity.name.root}], "
|
||||||
|
f"Moving to next upstreamDataflow"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
upstream_dataflow_fqn = fqn.build(
|
||||||
|
self.metadata,
|
||||||
|
entity_type=DashboardDataModel,
|
||||||
|
service_name=self.context.get().dashboard_service,
|
||||||
|
data_model_name=upstream_dataflow.targetDataflowId,
|
||||||
|
)
|
||||||
|
upstream_dataflow_entity = self.metadata.get_by_name(
|
||||||
|
entity=DashboardDataModel,
|
||||||
|
fqn=upstream_dataflow_fqn,
|
||||||
|
)
|
||||||
|
if upstream_dataflow_entity and datamodel_entity:
|
||||||
|
yield self._get_add_lineage_request(
|
||||||
|
from_entity=upstream_dataflow_entity,
|
||||||
|
to_entity=datamodel_entity,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"No upstreamDataflow entity with id={str(upstream_dataflow.targetDataflowId)} "
|
||||||
|
f"found for datamodel [{datamodel_entity.name.root}]"
|
||||||
|
)
|
||||||
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
|
yield Either(
|
||||||
|
left=StackTraceError(
|
||||||
|
name="Dataset and UpstreamDataflow Lineage",
|
||||||
|
error=(
|
||||||
|
"Error to yield dataset and upstreamDataflow lineage "
|
||||||
|
f"between [{datamodel_entity.name.root}, {str(upstream_dataflow.targetDataflowId)}]: {exc}"
|
||||||
|
),
|
||||||
|
stackTrace=traceback.format_exc(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_dataset_upstream_dataset_lineage(
|
||||||
|
self, datamodel: Dataset, datamodel_entity: DashboardDataModel
|
||||||
|
) -> Iterable[Either[AddLineageRequest]]:
|
||||||
|
"""
|
||||||
|
Create lineage between dataset and upstreamDataset
|
||||||
|
"""
|
||||||
|
for upstream_dataset in datamodel.upstreamDatasets or []:
|
||||||
|
try:
|
||||||
|
if not upstream_dataset.targetDatasetId:
|
||||||
|
logger.debug(
|
||||||
|
f"No targetDatasetId found for upstreamDataset in "
|
||||||
|
f"datamodel [{datamodel_entity.name.root}], "
|
||||||
|
f"Moving to next upstreamDataset"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
upstream_dataset_fqn = fqn.build(
|
||||||
|
self.metadata,
|
||||||
|
entity_type=DashboardDataModel,
|
||||||
|
service_name=self.context.get().dashboard_service,
|
||||||
|
data_model_name=upstream_dataset.targetDatasetId,
|
||||||
|
)
|
||||||
|
upstream_dataset_entity = self.metadata.get_by_name(
|
||||||
|
entity=DashboardDataModel,
|
||||||
|
fqn=upstream_dataset_fqn,
|
||||||
|
)
|
||||||
|
if upstream_dataset_entity and datamodel_entity:
|
||||||
|
yield self._get_add_lineage_request(
|
||||||
|
from_entity=upstream_dataset_entity,
|
||||||
|
to_entity=datamodel_entity,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"No upstreamDataset entity with id={str(upstream_dataset.targetDatasetId)} "
|
||||||
|
f"found for datamodel [{datamodel_entity.name.root}]"
|
||||||
|
)
|
||||||
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
|
yield Either(
|
||||||
|
left=StackTraceError(
|
||||||
|
name="Dataset and UpstreamDataset Lineage",
|
||||||
|
error=(
|
||||||
|
"Error to yield dataset and upstreamDataset lineage "
|
||||||
|
f"between [{datamodel_entity.name.root}, {str(upstream_dataset.targetDatasetId)}]: {exc}"
|
||||||
|
),
|
||||||
|
stackTrace=traceback.format_exc(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def create_dataflow_upstream_dataflow_lineage(
|
||||||
|
self, datamodel: Dataflow, datamodel_entity: DashboardDataModel
|
||||||
|
) -> Iterable[Either[AddLineageRequest]]:
|
||||||
|
"""
|
||||||
|
Create lineage between dataflow and upstreamDataflow
|
||||||
|
"""
|
||||||
|
for upstream_dataflow in datamodel.upstreamDataflows or []:
|
||||||
|
try:
|
||||||
|
if not upstream_dataflow.targetDataflowId:
|
||||||
|
logger.debug(
|
||||||
|
f"No targetDataflowId found for upstreamDataflow in "
|
||||||
|
f"datamodel [{datamodel_entity.name.root}], "
|
||||||
|
f"Moving to next upstreamDataflow"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
upstream_dataflow_fqn = fqn.build(
|
||||||
|
self.metadata,
|
||||||
|
entity_type=DashboardDataModel,
|
||||||
|
service_name=self.context.get().dashboard_service,
|
||||||
|
data_model_name=upstream_dataflow.targetDataflowId,
|
||||||
|
)
|
||||||
|
upstream_dataflow_entity = self.metadata.get_by_name(
|
||||||
|
entity=DashboardDataModel,
|
||||||
|
fqn=upstream_dataflow_fqn,
|
||||||
|
)
|
||||||
|
if upstream_dataflow_entity and datamodel_entity:
|
||||||
|
yield self._get_add_lineage_request(
|
||||||
|
from_entity=upstream_dataflow_entity,
|
||||||
|
to_entity=datamodel_entity,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
f"No upstreamDataflow entity with id={str(upstream_dataflow.targetDataflowId)} "
|
||||||
|
f"found for datamodel [{datamodel_entity.name.root}]"
|
||||||
|
)
|
||||||
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
|
yield Either(
|
||||||
|
left=StackTraceError(
|
||||||
|
name="Dataflow and UpstreamDataflow Lineage",
|
||||||
|
error=(
|
||||||
|
f"Error to yield dataflow and upstreamDataflow lineage "
|
||||||
|
f"between [{datamodel_entity.name.root}, {str(upstream_dataflow.targetDataflowId)}]: {exc}"
|
||||||
),
|
),
|
||||||
stackTrace=traceback.format_exc(),
|
stackTrace=traceback.format_exc(),
|
||||||
)
|
)
|
||||||
@ -911,7 +1036,6 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
try:
|
try:
|
||||||
if isinstance(dashboard_details, PowerBIReport):
|
if isinstance(dashboard_details, PowerBIReport):
|
||||||
yield from self.create_datamodel_report_lineage(
|
yield from self.create_datamodel_report_lineage(
|
||||||
db_service_name=db_service_name,
|
|
||||||
dashboard_details=dashboard_details,
|
dashboard_details=dashboard_details,
|
||||||
)
|
)
|
||||||
if isinstance(dashboard_details, PowerBIDashboard):
|
if isinstance(dashboard_details, PowerBIDashboard):
|
||||||
@ -922,7 +1046,68 @@ class PowerbiSource(DashboardServiceSource):
|
|||||||
yield Either(
|
yield Either(
|
||||||
left=StackTraceError(
|
left=StackTraceError(
|
||||||
name="Dashboard Lineage",
|
name="Dashboard Lineage",
|
||||||
error=f"Error to yield dashboard lineage details for DB service name [{db_service_name}]: {exc}",
|
error=f"Error to yield dashboard lineage details for DB service name [{str(db_service_name)}]: {exc}",
|
||||||
|
stackTrace=traceback.format_exc(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
Iterate loop for filtered datamodels so datamodels which are not connected to
|
||||||
|
any report but have tables would be eligible for a dataset-db_table lineage.
|
||||||
|
Also create below lineages:
|
||||||
|
1. dataset-db_table
|
||||||
|
2. dataset-upstreamDataflow
|
||||||
|
3. dataset-upstreamDataset
|
||||||
|
4. dataflow-upstreamDataflow
|
||||||
|
"""
|
||||||
|
for datamodel in self.filtered_datamodels or []:
|
||||||
|
try:
|
||||||
|
datamodel_fqn = fqn.build(
|
||||||
|
self.metadata,
|
||||||
|
entity_type=DashboardDataModel,
|
||||||
|
service_name=self.context.get().dashboard_service,
|
||||||
|
data_model_name=datamodel.id,
|
||||||
|
)
|
||||||
|
datamodel_entity = self.metadata.get_by_name(
|
||||||
|
entity=DashboardDataModel,
|
||||||
|
fqn=datamodel_fqn,
|
||||||
|
)
|
||||||
|
if datamodel_entity:
|
||||||
|
if isinstance(datamodel, Dataset):
|
||||||
|
# 1. datamodel-db_table lineage
|
||||||
|
for table in datamodel.tables or []:
|
||||||
|
yield from self._get_table_and_datamodel_lineage(
|
||||||
|
db_service_name=db_service_name,
|
||||||
|
table=table,
|
||||||
|
datamodel_entity=datamodel_entity,
|
||||||
|
)
|
||||||
|
# 2. dataset-upstreamDataflow lineage
|
||||||
|
yield from self.create_dataset_upstream_dataflow_lineage(
|
||||||
|
datamodel, datamodel_entity
|
||||||
|
)
|
||||||
|
# 3. dataset-upstreamDataset lineage
|
||||||
|
yield from self.create_dataset_upstream_dataset_lineage(
|
||||||
|
datamodel, datamodel_entity
|
||||||
|
)
|
||||||
|
# create the lineage between table and datamodel using the pbit files
|
||||||
|
if self.client.file_client:
|
||||||
|
yield from self.create_table_datamodel_lineage_from_files(
|
||||||
|
db_service_name=db_service_name,
|
||||||
|
datamodel_entity=datamodel_entity,
|
||||||
|
)
|
||||||
|
elif isinstance(datamodel, Dataflow):
|
||||||
|
# create dataflow-upstreamDataflow lineage
|
||||||
|
yield from self.create_dataflow_upstream_dataflow_lineage(
|
||||||
|
datamodel, datamodel_entity
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Unknown datamodel type: {type(datamodel)}, name: {datamodel.name}"
|
||||||
|
)
|
||||||
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
|
yield Either(
|
||||||
|
left=StackTraceError(
|
||||||
|
name="Datamodel Lineage",
|
||||||
|
error=f"Error to yield datamodel lineage details for DB service name [{str(db_service_name)}]: {exc}",
|
||||||
stackTrace=traceback.format_exc(),
|
stackTrace=traceback.format_exc(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -174,6 +174,16 @@ class DatasetExpression(BaseModel):
|
|||||||
expression: Optional[str] = None
|
expression: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreaDataflow(BaseModel):
|
||||||
|
groupId: Optional[str] = None
|
||||||
|
targetDataflowId: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreaDataset(BaseModel):
|
||||||
|
groupId: Optional[str] = None
|
||||||
|
targetDatasetId: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class Dataset(BaseModel):
|
class Dataset(BaseModel):
|
||||||
"""
|
"""
|
||||||
PowerBI Dataset Model
|
PowerBI Dataset Model
|
||||||
@ -187,6 +197,8 @@ class Dataset(BaseModel):
|
|||||||
users: Optional[List[PowerBIUser]] = []
|
users: Optional[List[PowerBIUser]] = []
|
||||||
expressions: Optional[List[DatasetExpression]] = []
|
expressions: Optional[List[DatasetExpression]] = []
|
||||||
configuredBy: Optional[str] = None
|
configuredBy: Optional[str] = None
|
||||||
|
upstreamDataflows: Optional[List[UpstreaDataflow]] = []
|
||||||
|
upstreamDatasets: Optional[List[UpstreaDataset]] = []
|
||||||
|
|
||||||
|
|
||||||
class DatasetResponse(BaseModel):
|
class DatasetResponse(BaseModel):
|
||||||
@ -205,6 +217,7 @@ class Dataflow(BaseModel):
|
|||||||
description: Optional[str] = None
|
description: Optional[str] = None
|
||||||
users: Optional[List[PowerBIUser]] = []
|
users: Optional[List[PowerBIUser]] = []
|
||||||
modifiedBy: Optional[str] = None
|
modifiedBy: Optional[str] = None
|
||||||
|
upstreamDataflows: Optional[List[UpstreaDataflow]] = []
|
||||||
|
|
||||||
|
|
||||||
class Group(BaseModel):
|
class Group(BaseModel):
|
||||||
|
@ -16,11 +16,14 @@ from metadata.generated.schema.type.entityReferenceList import EntityReferenceLi
|
|||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
from metadata.ingestion.source.dashboard.powerbi.metadata import PowerbiSource
|
from metadata.ingestion.source.dashboard.powerbi.metadata import PowerbiSource
|
||||||
from metadata.ingestion.source.dashboard.powerbi.models import (
|
from metadata.ingestion.source.dashboard.powerbi.models import (
|
||||||
|
Dataflow,
|
||||||
Dataset,
|
Dataset,
|
||||||
PowerBIDashboard,
|
PowerBIDashboard,
|
||||||
PowerBiTable,
|
PowerBiTable,
|
||||||
PowerBITableSource,
|
PowerBITableSource,
|
||||||
|
UpstreaDataflow,
|
||||||
)
|
)
|
||||||
|
from metadata.utils import fqn
|
||||||
|
|
||||||
MOCK_REDSHIFT_EXP = """
|
MOCK_REDSHIFT_EXP = """
|
||||||
let
|
let
|
||||||
@ -191,6 +194,12 @@ MOCK_DASHBOARD_DATA_MODEL = DashboardDataModel(
|
|||||||
columns=[],
|
columns=[],
|
||||||
dataModelType=DataModelType.PowerBIDataModel.value,
|
dataModelType=DataModelType.PowerBIDataModel.value,
|
||||||
)
|
)
|
||||||
|
MOCK_DATAMODEL_ENTITY = DashboardDataModel(
|
||||||
|
name="dummy_dataflow_id_a",
|
||||||
|
id=uuid.uuid4(),
|
||||||
|
dataModelType=DataModelType.PowerBIDataFlow.value,
|
||||||
|
columns=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PowerBIUnitTest(TestCase):
|
class PowerBIUnitTest(TestCase):
|
||||||
@ -383,3 +392,29 @@ class PowerBIUnitTest(TestCase):
|
|||||||
self.assertIsNone(result["database"])
|
self.assertIsNone(result["database"])
|
||||||
self.assertIsNone(result["schema"])
|
self.assertIsNone(result["schema"])
|
||||||
self.assertEqual(result["table"], "CUSTOMER_TABLE")
|
self.assertEqual(result["table"], "CUSTOMER_TABLE")
|
||||||
|
|
||||||
|
@pytest.mark.order(5)
|
||||||
|
@patch.object(OpenMetadata, "get_by_name", return_value=MOCK_DATAMODEL_ENTITY)
|
||||||
|
@patch.object(fqn, "build", return_value=None)
|
||||||
|
def test_upstream_dataflow_lineage(self, *_):
|
||||||
|
MOCK_DATAMODEL_ENTITY_2 = DashboardDataModel(
|
||||||
|
name="dummy_dataflow_id_b",
|
||||||
|
id=uuid.uuid4(),
|
||||||
|
dataModelType=DataModelType.PowerBIDataFlow.value,
|
||||||
|
columns=[],
|
||||||
|
)
|
||||||
|
MOCK_DATAMODEL_2 = Dataflow(
|
||||||
|
name="dataflow_b",
|
||||||
|
objectId="dummy_dataflow_id_b",
|
||||||
|
upstreamDataflows=[
|
||||||
|
UpstreaDataflow(
|
||||||
|
targetDataflowId="dataflow_a",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
lineage_request = list(
|
||||||
|
self.powerbi.create_dataflow_upstream_dataflow_lineage(
|
||||||
|
MOCK_DATAMODEL_2, MOCK_DATAMODEL_ENTITY_2
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert lineage_request[0].right is not None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user