2021-12-01 12:46:28 +05:30
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2021-10-28 20:31:38 +02:00
|
|
|
"""
|
|
|
|
OpenMetadata high-level API Lineage test
|
|
|
|
"""
|
|
|
|
from unittest import TestCase
|
|
|
|
|
2022-02-01 01:29:56 +01:00
|
|
|
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
2022-04-05 21:20:39 +02:00
|
|
|
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
|
|
|
CreateDatabaseSchemaRequest,
|
|
|
|
)
|
2022-02-01 01:29:56 +01:00
|
|
|
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
|
|
|
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
|
|
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
2021-10-28 20:31:38 +02:00
|
|
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
2022-02-01 01:29:56 +01:00
|
|
|
CreateDatabaseServiceRequest,
|
2021-10-28 20:31:38 +02:00
|
|
|
)
|
|
|
|
from metadata.generated.schema.api.services.createPipelineService import (
|
2022-02-01 01:29:56 +01:00
|
|
|
CreatePipelineServiceRequest,
|
2021-10-28 20:31:38 +02:00
|
|
|
)
|
2022-04-05 21:20:39 +02:00
|
|
|
from metadata.generated.schema.entity.data.table import Column, DataType
|
2023-06-16 13:18:12 +05:30
|
|
|
from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
|
|
|
|
BasicAuth,
|
|
|
|
)
|
2022-04-05 21:20:39 +02:00
|
|
|
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
|
|
|
|
MysqlConnection,
|
|
|
|
)
|
2022-04-12 23:40:21 -07:00
|
|
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
|
|
|
OpenMetadataConnection,
|
|
|
|
)
|
2022-05-25 08:35:16 +02:00
|
|
|
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
|
|
|
|
AirflowConnection,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.connections.pipeline.backendConnection import (
|
|
|
|
BackendConnection,
|
|
|
|
)
|
2021-10-28 20:31:38 +02:00
|
|
|
from metadata.generated.schema.entity.services.databaseService import (
|
2022-01-21 22:06:14 -08:00
|
|
|
DatabaseConnection,
|
2021-10-28 20:31:38 +02:00
|
|
|
DatabaseService,
|
|
|
|
DatabaseServiceType,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.entity.services.pipelineService import (
|
2022-05-25 08:35:16 +02:00
|
|
|
PipelineConnection,
|
2021-10-28 20:31:38 +02:00
|
|
|
PipelineService,
|
|
|
|
PipelineServiceType,
|
|
|
|
)
|
2022-09-26 16:19:47 +05:30
|
|
|
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
|
|
|
|
OpenMetadataJWTClientConfig,
|
|
|
|
)
|
2024-04-23 09:37:43 +05:30
|
|
|
from metadata.generated.schema.type.entityLineage import (
|
|
|
|
ColumnLineage,
|
|
|
|
EntitiesEdge,
|
|
|
|
LineageDetails,
|
|
|
|
)
|
2021-10-28 20:31:38 +02:00
|
|
|
from metadata.generated.schema.type.entityReference import EntityReference
|
|
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
|
|
|
|
|
|
|
|
|
|
class OMetaLineageTest(TestCase):
|
|
|
|
"""
|
|
|
|
Run this integration test with the local API available
|
|
|
|
Install the ingestion package before running the tests
|
|
|
|
"""
|
|
|
|
|
|
|
|
service_entity_id = None
|
|
|
|
|
2023-06-12 11:17:32 +05:30
|
|
|
# pylint: disable=line-too-long
|
2022-09-26 16:19:47 +05:30
|
|
|
server_config = OpenMetadataConnection(
|
|
|
|
hostPort="http://localhost:8585/api",
|
|
|
|
authProvider="openmetadata",
|
|
|
|
securityConfig=OpenMetadataJWTClientConfig(
|
|
|
|
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
|
|
),
|
|
|
|
)
|
2021-10-28 20:31:38 +02:00
|
|
|
metadata = OpenMetadata(server_config)
|
|
|
|
|
2021-10-31 16:23:01 +01:00
|
|
|
assert metadata.health_check()
|
|
|
|
|
2022-02-01 01:29:56 +01:00
|
|
|
db_service = CreateDatabaseServiceRequest(
|
2021-10-28 20:31:38 +02:00
|
|
|
name="test-service-db-lineage",
|
2022-04-14 11:22:39 +02:00
|
|
|
serviceType=DatabaseServiceType.Mysql,
|
2022-04-05 21:20:39 +02:00
|
|
|
connection=DatabaseConnection(
|
|
|
|
config=MysqlConnection(
|
|
|
|
username="username",
|
2023-06-16 13:18:12 +05:30
|
|
|
authType=BasicAuth(
|
|
|
|
password="password",
|
|
|
|
),
|
2022-04-05 21:20:39 +02:00
|
|
|
hostPort="http://localhost:1234",
|
|
|
|
)
|
|
|
|
),
|
2021-10-28 20:31:38 +02:00
|
|
|
)
|
|
|
|
|
2022-02-01 01:29:56 +01:00
|
|
|
pipeline_service = CreatePipelineServiceRequest(
|
2021-10-28 20:31:38 +02:00
|
|
|
name="test-service-pipeline-lineage",
|
|
|
|
serviceType=PipelineServiceType.Airflow,
|
2022-05-25 08:35:16 +02:00
|
|
|
connection=PipelineConnection(
|
|
|
|
config=AirflowConnection(
|
|
|
|
hostPort="http://localhost:8080",
|
|
|
|
connection=BackendConnection(),
|
|
|
|
),
|
|
|
|
),
|
2021-10-28 20:31:38 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Prepare ingredients
|
|
|
|
"""
|
|
|
|
|
|
|
|
cls.db_service_entity = cls.metadata.create_or_update(data=cls.db_service)
|
|
|
|
cls.pipeline_service_entity = cls.metadata.create_or_update(
|
|
|
|
data=cls.pipeline_service
|
|
|
|
)
|
|
|
|
|
2022-04-05 21:20:39 +02:00
|
|
|
create_db = CreateDatabaseRequest(
|
2021-10-28 20:31:38 +02:00
|
|
|
name="test-db",
|
2023-02-13 00:08:55 -08:00
|
|
|
service=cls.db_service_entity.fullyQualifiedName,
|
2021-10-28 20:31:38 +02:00
|
|
|
)
|
|
|
|
|
2022-04-05 21:20:39 +02:00
|
|
|
create_db_entity = cls.metadata.create_or_update(data=create_db)
|
|
|
|
|
|
|
|
create_schema = CreateDatabaseSchemaRequest(
|
2023-02-13 00:08:55 -08:00
|
|
|
name="test-schema",
|
|
|
|
database=create_db_entity.fullyQualifiedName,
|
2022-04-05 21:20:39 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
create_schema_entity = cls.metadata.create_or_update(data=create_schema)
|
2021-10-28 20:31:38 +02:00
|
|
|
|
2024-04-23 09:37:43 +05:30
|
|
|
cls.table1 = CreateTableRequest(
|
|
|
|
name="table1",
|
|
|
|
databaseSchema=create_schema_entity.fullyQualifiedName,
|
|
|
|
columns=[
|
|
|
|
Column(name="id", dataType=DataType.BIGINT),
|
|
|
|
Column(name="name", dataType=DataType.STRING),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
cls.table1_entity = cls.metadata.create_or_update(data=cls.table1)
|
|
|
|
|
|
|
|
cls.table2 = CreateTableRequest(
|
|
|
|
name="table2",
|
2023-02-13 00:08:55 -08:00
|
|
|
databaseSchema=create_schema_entity.fullyQualifiedName,
|
2024-04-23 09:37:43 +05:30
|
|
|
columns=[
|
|
|
|
Column(name="id", dataType=DataType.BIGINT),
|
|
|
|
Column(name="name", dataType=DataType.STRING),
|
|
|
|
],
|
2021-10-28 20:31:38 +02:00
|
|
|
)
|
|
|
|
|
2024-04-23 09:37:43 +05:30
|
|
|
cls.table2_entity = cls.metadata.create_or_update(data=cls.table2)
|
2021-10-28 20:31:38 +02:00
|
|
|
|
2022-02-01 01:29:56 +01:00
|
|
|
cls.pipeline = CreatePipelineRequest(
|
2021-10-28 20:31:38 +02:00
|
|
|
name="test",
|
2023-02-13 00:08:55 -08:00
|
|
|
service=cls.pipeline_service_entity.fullyQualifiedName,
|
2021-10-28 20:31:38 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
cls.pipeline_entity = cls.metadata.create_or_update(data=cls.pipeline)
|
|
|
|
|
2022-02-01 01:29:56 +01:00
|
|
|
cls.create = AddLineageRequest(
|
2021-10-28 20:31:38 +02:00
|
|
|
edge=EntitiesEdge(
|
2024-04-23 09:37:43 +05:30
|
|
|
fromEntity=EntityReference(id=cls.table1_entity.id, type="table"),
|
|
|
|
toEntity=EntityReference(id=cls.table2_entity.id, type="table"),
|
2023-06-12 11:17:32 +05:30
|
|
|
lineageDetails=LineageDetails(description="test lineage"),
|
2021-10-28 20:31:38 +02:00
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Clean up
|
|
|
|
"""
|
|
|
|
|
|
|
|
db_service_id = str(
|
|
|
|
cls.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=DatabaseService, fqn="test-service-db-lineage"
|
2021-10-28 20:31:38 +02:00
|
|
|
).id.__root__
|
|
|
|
)
|
|
|
|
|
|
|
|
pipeline_service_id = str(
|
|
|
|
cls.metadata.get_by_name(
|
2022-05-26 21:00:18 +02:00
|
|
|
entity=PipelineService, fqn="test-service-pipeline-lineage"
|
2021-10-28 20:31:38 +02:00
|
|
|
).id.__root__
|
|
|
|
)
|
|
|
|
|
2022-03-21 22:30:39 +05:30
|
|
|
cls.metadata.delete(
|
2022-04-05 21:20:39 +02:00
|
|
|
entity=PipelineService,
|
|
|
|
entity_id=pipeline_service_id,
|
|
|
|
recursive=True,
|
|
|
|
hard_delete=True,
|
|
|
|
)
|
|
|
|
cls.metadata.delete(
|
|
|
|
entity=DatabaseService,
|
|
|
|
entity_id=db_service_id,
|
|
|
|
recursive=True,
|
|
|
|
hard_delete=True,
|
2022-03-21 22:30:39 +05:30
|
|
|
)
|
2021-10-28 20:31:38 +02:00
|
|
|
|
|
|
|
def test_create(self):
|
|
|
|
"""
|
|
|
|
We can create a Lineage and get the origin node lineage info back
|
|
|
|
"""
|
|
|
|
|
2024-04-23 09:37:43 +05:30
|
|
|
from_id = str(self.table1_entity.id.__root__)
|
|
|
|
to_id = str(self.table2_entity.id.__root__)
|
2021-10-28 20:31:38 +02:00
|
|
|
|
|
|
|
res = self.metadata.add_lineage(data=self.create)
|
|
|
|
|
|
|
|
# Check that we get the origin ID in the entity
|
|
|
|
assert res["entity"]["id"] == from_id
|
|
|
|
|
|
|
|
# Check that the toEntity is a node in the origin lineage
|
|
|
|
node_id = next(
|
|
|
|
iter([node["id"] for node in res["nodes"] if node["id"] == to_id]), None
|
|
|
|
)
|
|
|
|
assert node_id
|
2024-04-23 09:37:43 +05:30
|
|
|
|
|
|
|
# Add Pipeline to the lineage edge
|
|
|
|
linage_request_1 = AddLineageRequest(
|
|
|
|
edge=EntitiesEdge(
|
|
|
|
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
|
|
|
|
toEntity=EntityReference(id=self.table2_entity.id, type="table"),
|
|
|
|
lineageDetails=LineageDetails(
|
|
|
|
description="test lineage",
|
|
|
|
pipeline=EntityReference(
|
|
|
|
id=self.pipeline_entity.id, type="pipeline"
|
|
|
|
),
|
|
|
|
),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
res = self.metadata.add_lineage(data=linage_request_1, check_patch=True)
|
|
|
|
|
|
|
|
res["entity"]["id"] = str(res["entity"]["id"])
|
|
|
|
self.assertEqual(len(res["downstreamEdges"]), 1)
|
|
|
|
self.assertEqual(
|
|
|
|
res["downstreamEdges"][0]["lineageDetails"]["pipeline"]["id"],
|
|
|
|
str(self.pipeline_entity.id.__root__),
|
|
|
|
)
|
|
|
|
|
|
|
|
# Add a column to the lineage edge
|
|
|
|
linage_request_2 = AddLineageRequest(
|
|
|
|
edge=EntitiesEdge(
|
|
|
|
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
|
|
|
|
toEntity=EntityReference(id=self.table2_entity.id, type="table"),
|
|
|
|
lineageDetails=LineageDetails(
|
|
|
|
description="test lineage",
|
|
|
|
columnsLineage=[
|
|
|
|
ColumnLineage(
|
|
|
|
fromColumns=[
|
|
|
|
f"{self.table1_entity.fullyQualifiedName.__root__}.id"
|
|
|
|
],
|
|
|
|
toColumn=f"{self.table2_entity.fullyQualifiedName.__root__}.id",
|
|
|
|
)
|
|
|
|
],
|
|
|
|
),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
res = self.metadata.add_lineage(data=linage_request_2, check_patch=True)
|
|
|
|
|
|
|
|
res["entity"]["id"] = str(res["entity"]["id"])
|
|
|
|
self.assertEqual(len(res["downstreamEdges"]), 1)
|
|
|
|
self.assertEqual(
|
|
|
|
res["downstreamEdges"][0]["lineageDetails"]["pipeline"]["id"],
|
|
|
|
str(self.pipeline_entity.id.__root__),
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
len(res["downstreamEdges"][0]["lineageDetails"]["columnsLineage"]), 1
|
|
|
|
)
|
|
|
|
|
|
|
|
# Add a new column to the lineage edge
|
|
|
|
linage_request_2 = AddLineageRequest(
|
|
|
|
edge=EntitiesEdge(
|
|
|
|
fromEntity=EntityReference(id=self.table1_entity.id, type="table"),
|
|
|
|
toEntity=EntityReference(id=self.table2_entity.id, type="table"),
|
|
|
|
lineageDetails=LineageDetails(
|
|
|
|
description="test lineage",
|
|
|
|
columnsLineage=[
|
|
|
|
ColumnLineage(
|
|
|
|
fromColumns=[
|
|
|
|
f"{self.table1_entity.fullyQualifiedName.__root__}.name"
|
|
|
|
],
|
|
|
|
toColumn=f"{self.table2_entity.fullyQualifiedName.__root__}.name",
|
|
|
|
)
|
|
|
|
],
|
|
|
|
),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
res = self.metadata.add_lineage(data=linage_request_2, check_patch=True)
|
|
|
|
|
|
|
|
res["entity"]["id"] = str(res["entity"]["id"])
|
|
|
|
self.assertEqual(len(res["downstreamEdges"]), 1)
|
|
|
|
self.assertEqual(
|
|
|
|
res["downstreamEdges"][0]["lineageDetails"]["pipeline"]["id"],
|
|
|
|
str(self.pipeline_entity.id.__root__),
|
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
len(res["downstreamEdges"][0]["lineageDetails"]["columnsLineage"]), 2
|
|
|
|
)
|