OpenMetadata/ingestion/tests/integration/ometa/test_ometa_lineage_api.py

412 lines
15 KiB
Python

# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
OpenMetadata high-level API Lineage test
"""
from unittest import TestCase
from _openmetadata_testutils.ometa import int_admin_ometa
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
from metadata.generated.schema.entity.data.dashboard import Dashboard
from metadata.generated.schema.entity.data.dashboardDataModel import DashboardDataModel
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.pipeline import Pipeline
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.dashboardService import DashboardService
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.entity.services.pipelineService import PipelineService
from metadata.generated.schema.type.basic import EntityName
from metadata.generated.schema.type.entityLineage import (
ColumnLineage,
EntitiesEdge,
EntityLineage,
LineageDetails,
)
from metadata.generated.schema.type.entityLineage import Source as LineageSource
from metadata.generated.schema.type.entityReference import EntityReference
from ..integration_base import generate_name, get_create_entity, get_create_service
class OMetaLineageTest(TestCase):
"""
Run this integration test with the local API available
Install the ingestion package before running the tests
"""
service_entity_id = None
metadata = int_admin_ometa()
assert metadata.health_check()
db_service_name = generate_name()
pipeline_service_name = generate_name()
dashboard_service_name = generate_name()
db_service = get_create_service(entity=DatabaseService, name=db_service_name)
pipeline_service = get_create_service(
entity=PipelineService, name=pipeline_service_name
)
dashboard_service = get_create_service(
entity=DashboardService, name=dashboard_service_name
)
@classmethod
def setUpClass(cls) -> None:
"""
Prepare ingredients
"""
cls.db_service_entity: DatabaseService = cls.metadata.create_or_update(
data=cls.db_service
)
cls.pipeline_service_entity: PipelineService = cls.metadata.create_or_update(
data=cls.pipeline_service
)
cls.dashboard_service_entity: DashboardService = cls.metadata.create_or_update(
data=cls.dashboard_service
)
create_db_entity: Database = cls.metadata.create_or_update(
data=get_create_entity(
entity=Database,
reference=cls.db_service_entity.fullyQualifiedName,
name=generate_name(),
)
)
cls.create_schema_entity = cls.metadata.create_or_update(
data=get_create_entity(
entity=DatabaseSchema,
reference=create_db_entity.fullyQualifiedName,
name=generate_name(),
)
)
cls.table1 = get_create_entity(
name=generate_name(),
entity=Table,
reference=cls.create_schema_entity.fullyQualifiedName,
)
cls.table1_entity = cls.metadata.create_or_update(data=cls.table1)
cls.table2 = get_create_entity(
name=generate_name(),
entity=Table,
reference=cls.create_schema_entity.fullyQualifiedName,
)
cls.table2_entity = cls.metadata.create_or_update(data=cls.table2)
cls.pipeline = get_create_entity(
name=generate_name(),
entity=Pipeline,
reference=cls.pipeline_service_entity.fullyQualifiedName,
)
cls.pipeline_entity = cls.metadata.create_or_update(data=cls.pipeline)
cls.dashboard = get_create_entity(
name=generate_name(),
entity=Dashboard,
reference=cls.dashboard_service_entity.fullyQualifiedName,
)
cls.dashboard_entity = cls.metadata.create_or_update(data=cls.dashboard)
cls.dashboard_datamodel = get_create_entity(
name=generate_name(),
entity=DashboardDataModel,
reference=cls.dashboard_service_entity.fullyQualifiedName,
)
cls.dashboard_datamodel_entity = cls.metadata.create_or_update(
data=cls.dashboard_datamodel
)
@classmethod
def tearDownClass(cls) -> None:
"""
Clean up
"""
db_service_id = str(
cls.metadata.get_by_name(
entity=DatabaseService, fqn=cls.db_service_name
).id.root
)
pipeline_service_id = str(
cls.metadata.get_by_name(
entity=PipelineService, fqn=cls.pipeline_service_name
).id.root
)
dashboard_service_id = str(
cls.metadata.get_by_name(
entity=DashboardService, fqn=cls.dashboard_service_name
).id.root
)
cls.metadata.delete(
entity=PipelineService,
entity_id=pipeline_service_id,
recursive=True,
hard_delete=True,
)
cls.metadata.delete(
entity=DatabaseService,
entity_id=db_service_id,
recursive=True,
hard_delete=True,
)
cls.metadata.delete(
entity=DashboardService,
entity_id=dashboard_service_id,
recursive=True,
hard_delete=True,
)
def test_create(self):
"""
We can create a Lineage and get the origin node lineage info back
"""
from_id = str(self.table1_entity.id.root)
to_id = str(self.table2_entity.id.root)
res = self.metadata.add_lineage(
data=AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
toEntity=EntityReference(id=self.table2_entity.id, type="table"),
lineageDetails=LineageDetails(description="test lineage"),
),
)
)
# Check that we get the origin ID in the entity
self.assertEqual(from_id, res["entity"]["id"])
# Check that the toEntity is a node in the origin lineage
node_id = next(
iter([node["id"] for node in res["nodes"] if node["id"] == to_id]), None
)
self.assertIsNotNone(node_id)
# Add pipeline to the lineage edge
linage_request_1 = AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
toEntity=EntityReference(id=self.table2_entity.id, type="table"),
lineageDetails=LineageDetails(
description="test lineage",
pipeline=EntityReference(
id=self.pipeline_entity.id, type="pipeline"
),
),
),
)
res = self.metadata.add_lineage(data=linage_request_1, check_patch=True)
res["entity"]["id"] = str(res["entity"]["id"])
self.assertEqual(len(res["downstreamEdges"]), 1)
self.assertEqual(
res["downstreamEdges"][0]["lineageDetails"]["pipeline"]["id"],
str(self.pipeline_entity.id.root),
)
# Add a column to the lineage edge
linage_request_2 = AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
toEntity=EntityReference(id=self.table2_entity.id, type="table"),
lineageDetails=LineageDetails(
description="test lineage",
columnsLineage=[
ColumnLineage(
fromColumns=[
f"{self.table1_entity.fullyQualifiedName.root}.id"
],
toColumn=f"{self.table2_entity.fullyQualifiedName.root}.id",
)
],
),
),
)
res = self.metadata.add_lineage(data=linage_request_2, check_patch=True)
res["entity"]["id"] = str(res["entity"]["id"])
self.assertEqual(len(res["downstreamEdges"]), 1)
self.assertEqual(
res["downstreamEdges"][0]["lineageDetails"]["pipeline"]["id"],
str(self.pipeline_entity.id.root),
)
self.assertEqual(
len(res["downstreamEdges"][0]["lineageDetails"]["columnsLineage"]), 1
)
# Add a new column to the lineage edge
linage_request_2 = AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
toEntity=EntityReference(id=self.table2_entity.id, type="table"),
lineageDetails=LineageDetails(
description="test lineage",
columnsLineage=[
ColumnLineage(
fromColumns=[
f"{self.table1_entity.fullyQualifiedName.root}.name"
],
toColumn=f"{self.table2_entity.fullyQualifiedName.root}.name",
)
],
),
),
)
res = self.metadata.add_lineage(data=linage_request_2, check_patch=True)
res["entity"]["id"] = str(res["entity"]["id"])
self.assertEqual(len(res["downstreamEdges"]), 1)
self.assertEqual(
res["downstreamEdges"][0]["lineageDetails"]["pipeline"]["id"],
str(self.pipeline_entity.id.root),
)
self.assertEqual(
len(res["downstreamEdges"][0]["lineageDetails"]["columnsLineage"]), 2
)
# We can get lineage by ID
lineage_id = self.metadata.get_lineage_by_id(
entity=Table, entity_id=self.table2_entity.id.root
)
assert lineage_id["entity"]["id"] == str(self.table2_entity.id.root)
# Same thing works if we pass directly the Uuid
lineage_uuid = self.metadata.get_lineage_by_id(
entity=Table, entity_id=self.table2_entity.id
)
assert lineage_uuid["entity"]["id"] == str(self.table2_entity.id.root)
# We can also get lineage by name
lineage_str = self.metadata.get_lineage_by_name(
entity=Table, fqn=self.table2_entity.fullyQualifiedName.root
)
assert lineage_str["entity"]["id"] == str(self.table2_entity.id.root)
# Or passing the FQN
lineage_fqn = self.metadata.get_lineage_by_name(
entity=Table, fqn=self.table2_entity.fullyQualifiedName
)
assert lineage_fqn["entity"]["id"] == str(self.table2_entity.id.root)
def test_delete_by_source(self):
"""
Test case for deleting lineage by source.
This method tests the functionality of deleting lineage by source. It retrieves the lineage
information for a specific table entity using its ID. Then, it records the original length of
the upstream edges in the lineage. After that, it deletes the lineage by specifying the source
type, table ID, and lineage source. Finally, it asserts that the length of the upstream edges
in the lineage has decreased by 1.
"""
lineage = self.metadata.get_lineage_by_id(
entity="table", entity_id=self.table2_entity.id.root
)
original_len = len(lineage.get("upstreamEdges") or [])
self.metadata.delete_lineage_by_source(
"table", self.table2_entity.id.root, LineageSource.Manual.value
)
lineage = self.metadata.get_lineage_by_id(
entity="table", entity_id=self.table2_entity.id.root
)
updated_len = len(lineage.get("upstreamEdges") or [])
self.assertEqual(updated_len, original_len - 1)
def test_table_datamodel_lineage(self):
"""We can create and get lineage for a table to a dashboard datamodel"""
from_id = str(self.table1_entity.id.root)
res = self.metadata.add_lineage(
data=AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
toEntity=EntityReference(
id=self.dashboard_datamodel_entity.id, type="dashboardDataModel"
),
lineageDetails=LineageDetails(description="test lineage"),
),
)
)
# Check that we get the origin ID in the entity
self.assertEqual(from_id, res["entity"]["id"])
# use the SDK to get the lineage
datamodel_lineage = self.metadata.get_lineage_by_name(
entity=DashboardDataModel,
fqn=self.dashboard_datamodel_entity.fullyQualifiedName.root,
)
entity_lineage = EntityLineage.model_validate(datamodel_lineage)
self.assertEqual(from_id, str(entity_lineage.upstreamEdges[0].fromEntity.root))
def test_table_with_slash_in_name(self):
"""E.g., `foo.bar/baz`"""
name = EntityName("foo.bar/baz")
new_table: Table = self.metadata.create_or_update(
data=get_create_entity(
entity=Table,
name=name,
reference=self.create_schema_entity.fullyQualifiedName,
)
)
res: Table = self.metadata.get_by_name(
entity=Table, fqn=new_table.fullyQualifiedName
)
assert res.name == name
self.metadata.add_lineage(
data=AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
toEntity=EntityReference(id=new_table.id, type="table"),
lineageDetails=LineageDetails(
columnsLineage=[
ColumnLineage(
fromColumns=[
self.table1_entity.columns[0].fullyQualifiedName
],
toColumn=new_table.columns[0].fullyQualifiedName,
)
]
),
),
)
)
# use the SDK to get the lineage
lineage = self.metadata.get_lineage_by_name(
entity=Table,
fqn=new_table.fullyQualifiedName.root,
)
entity_lineage = EntityLineage.model_validate(lineage)
assert (
entity_lineage.upstreamEdges[0].fromEntity.root
== self.table1_entity.id.root
)