OpenMetadata/ingestion/tests/integration/ometa/test_ometa_pipeline_api.py

392 lines
12 KiB
Python
Raw Normal View History

# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
OpenMetadata high-level API Pipeline test
"""
import uuid
from unittest import TestCase
from datetime import datetime
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
from metadata.generated.schema.api.services.createPipelineService import (
CreatePipelineServiceRequest,
)
from metadata.generated.schema.api.teams.createUser import CreateUserRequest
from metadata.generated.schema.entity.data.pipeline import Pipeline, PipelineStatus, StatusType, Task, TaskStatus
from metadata.generated.schema.entity.services.pipelineService import (
PipelineService,
PipelineServiceType,
)
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
from metadata.utils.helpers import datetime_to_ts
class OMetaPipelineTest(TestCase):
"""
Run this integration test with the local API available
Install the ingestion package before running the tests
"""
service_entity_id = None
server_config = MetadataServerConfig(api_endpoint="http://localhost:8585/api")
metadata = OpenMetadata(server_config)
assert metadata.health_check()
user = metadata.create_or_update(
data=CreateUserRequest(name="random-user", email="random@user.com"),
)
owner = EntityReference(id=user.id, type="user")
service = CreatePipelineServiceRequest(
name="test-service-pipeline",
serviceType=PipelineServiceType.Airflow,
pipelineUrl="https://localhost:1000",
)
service_type = "pipelineService"
@classmethod
def setUpClass(cls) -> None:
"""
Prepare ingredients
"""
cls.service_entity = cls.metadata.create_or_update(data=cls.service)
cls.entity = Pipeline(
id=uuid.uuid4(),
name="test",
service=EntityReference(id=cls.service_entity.id, type=cls.service_type),
fullyQualifiedName="test-service-pipeline.test",
)
cls.create = CreatePipelineRequest(
name="test",
service=EntityReference(id=cls.service_entity.id, type=cls.service_type),
)
@classmethod
def tearDownClass(cls) -> None:
"""
Clean up
"""
_id = str(
cls.metadata.get_by_name(
entity=Pipeline, fqdn="test-service-pipeline.test"
).id.__root__
)
service_id = str(
cls.metadata.get_by_name(
entity=PipelineService, fqdn="test-service-pipeline"
).id.__root__
)
cls.metadata.delete(entity=Pipeline, entity_id=_id)
cls.metadata.delete(entity=PipelineService, entity_id=service_id)
def test_create(self):
"""
We can create a Pipeline and we receive it back as Entity
"""
res = self.metadata.create_or_update(data=self.create)
self.assertEqual(res.name, self.entity.name)
self.assertEqual(res.service.id, self.entity.service.id)
self.assertEqual(res.owner, None)
def test_update(self):
"""
Updating it properly changes its properties
"""
res_create = self.metadata.create_or_update(data=self.create)
updated = self.create.dict(exclude_unset=True)
updated["owner"] = self.owner
updated_entity = CreatePipelineRequest(**updated)
res = self.metadata.create_or_update(data=updated_entity)
# Same ID, updated algorithm
self.assertEqual(res.service.id, updated_entity.service.id)
self.assertEqual(res_create.id, res.id)
self.assertEqual(res.owner.id, self.user.id)
def test_get_name(self):
"""
We can fetch a Pipeline by name and get it back as Entity
"""
self.metadata.create_or_update(data=self.create)
res = self.metadata.get_by_name(
entity=Pipeline, fqdn=self.entity.fullyQualifiedName
)
self.assertEqual(res.name, self.entity.name)
def test_get_id(self):
"""
We can fetch a Pipeline by ID and get it back as Entity
"""
self.metadata.create_or_update(data=self.create)
# First pick up by name
res_name = self.metadata.get_by_name(
entity=Pipeline, fqdn=self.entity.fullyQualifiedName
)
# Then fetch by ID
2021-12-06 08:40:53 +01:00
res = self.metadata.get_by_id(entity=Pipeline, entity_id=res_name.id)
self.assertEqual(res_name.id, res.id)
def test_list(self):
"""
We can list all our Pipelines
"""
self.metadata.create_or_update(data=self.create)
res = self.metadata.list_entities(entity=Pipeline, limit=100)
# Fetch our test Database. We have already inserted it, so we should find it
data = next(
iter(ent for ent in res.entities if ent.name == self.entity.name), None
)
assert data
def test_delete(self):
"""
We can delete a Pipeline by ID
"""
self.metadata.create_or_update(data=self.create)
# Find by name
res_name = self.metadata.get_by_name(
entity=Pipeline, fqdn=self.entity.fullyQualifiedName
)
# Then fetch by ID
res_id = self.metadata.get_by_id(
entity=Pipeline, entity_id=str(res_name.id.__root__)
)
# Delete
self.metadata.delete(entity=Pipeline, entity_id=str(res_id.id.__root__))
# Then we should not find it
res = self.metadata.list_entities(entity=Pipeline)
assert not next(
iter(
ent
for ent in res.entities
if ent.fullyQualifiedName == self.entity.fullyQualifiedName
),
None,
)
def test_add_status(self):
"""
We can add status data
"""
create_pipeline = CreatePipelineRequest(
name="pipeline-test",
service=EntityReference(id=self.service_entity.id, type=self.service_type),
tasks=[
Task(name="task1"),
Task(name="task2"),
]
)
pipeline = self.metadata.create_or_update(data=create_pipeline)
execution_ts = datetime_to_ts(datetime.strptime("2021-03-07", "%Y-%m-%d"))
updated = self.metadata.add_pipeline_status(
pipeline=pipeline,
status=PipelineStatus(
executionDate=execution_ts,
executionStatus=StatusType.Successful,
taskStatus=[
TaskStatus(name="task1", executionStatus=StatusType.Successful),
]
)
)
# We get a list of status
assert updated.pipelineStatus[0].executionDate.__root__ == execution_ts
assert len(updated.pipelineStatus[0].taskStatus) == 1
# Check that we can update a given status properly
updated = self.metadata.add_pipeline_status(
pipeline=pipeline,
status=PipelineStatus(
executionDate=execution_ts,
executionStatus=StatusType.Successful,
taskStatus=[
TaskStatus(name="task1", executionStatus=StatusType.Successful),
TaskStatus(name="task2", executionStatus=StatusType.Successful),
]
)
)
assert updated.pipelineStatus[0].executionDate.__root__ == execution_ts
assert len(updated.pipelineStatus[0].taskStatus) == 2
# Cleanup
self.metadata.delete(entity=Pipeline, entity_id=pipeline.id)
def test_add_tasks(self):
"""
Check the add task logic
"""
create_pipeline = CreatePipelineRequest(
name="pipeline-test",
service=EntityReference(id=self.service_entity.id, type=self.service_type),
tasks=[
Task(name="task1"),
Task(name="task2"),
]
)
pipeline = self.metadata.create_or_update(data=create_pipeline)
# Add new tasks
updated_pipeline = self.metadata.add_task_to_pipeline(
pipeline, Task(name="task3"),
)
assert len(updated_pipeline.tasks) == 3
# Update a task already added
updated_pipeline = self.metadata.add_task_to_pipeline(
pipeline, Task(name="task3", displayName="TaskDisplay"),
)
assert len(updated_pipeline.tasks) == 3
assert next(
iter(
task for task in updated_pipeline.tasks
if task.displayName == "TaskDisplay"
)
)
# Add more than one task at a time
new_tasks = [
Task(name="task3"),
Task(name="task4"),
]
updated_pipeline = self.metadata.add_task_to_pipeline(
pipeline, *new_tasks
)
assert len(updated_pipeline.tasks) == 4
# Cleanup
self.metadata.delete(entity=Pipeline, entity_id=pipeline.id)
def test_add_tasks_to_empty_pipeline(self):
"""
We can add tasks to a pipeline without tasks
"""
pipeline = self.metadata.create_or_update(data=self.create)
updated_pipeline = self.metadata.add_task_to_pipeline(
pipeline, Task(name="task", displayName="TaskDisplay"),
)
assert len(updated_pipeline.tasks) == 1
def test_clean_tasks(self):
"""
Check that we can remove Pipeline tasks
if they are not part of the list arg
"""
create_pipeline = CreatePipelineRequest(
name="pipeline-test",
service=EntityReference(id=self.service_entity.id, type=self.service_type),
tasks=[
Task(name="task1"),
Task(name="task2"),
Task(name="task3"),
Task(name="task4"),
]
)
pipeline = self.metadata.create_or_update(data=create_pipeline)
updated_pipeline = self.metadata.clean_pipeline_tasks(
pipeline=pipeline,
tasks=[
Task(name="task3"),
Task(name="task4")
]
)
assert len(updated_pipeline.tasks) == 2
assert {task.name for task in updated_pipeline.tasks} == {"task3", "task4"}
# Cleanup
self.metadata.delete(entity=Pipeline, entity_id=pipeline.id)
def test_list_versions(self):
"""
test list pipeline entity versions
"""
self.metadata.create_or_update(data=self.create)
# Find by name
res_name = self.metadata.get_by_name(
entity=Pipeline, fqdn=self.entity.fullyQualifiedName
)
res = self.metadata.get_list_entity_versions(
entity=Pipeline, entity_id=res_name.id.__root__
)
assert res
def test_get_entity_version(self):
"""
test get pipeline entity version
"""
self.metadata.create_or_update(data=self.create)
# Find by name
res_name = self.metadata.get_by_name(
entity=Pipeline, fqdn=self.entity.fullyQualifiedName
)
res = self.metadata.get_entity_version(
entity=Pipeline, entity_id=res_name.id.__root__, version=0.1
)
# check we get the correct version requested and the correct entity ID
assert res.version.__root__ == 0.1
assert res.id == res_name.id
def test_get_entity_ref(self):
"""
test get EntityReference
"""
res = self.metadata.create_or_update(data=self.create)
entity_ref = self.metadata.get_entity_reference(
entity=Pipeline, fqdn=res.fullyQualifiedName
)
assert res.id == entity_ref.id