mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-24 08:58:06 +00:00
parent
41d0c182cb
commit
75ccc803ad
@ -205,7 +205,7 @@ class DashboardServiceSource(TopologyRunnerMixin, Source, ABC):
|
|||||||
Yields lineage if config is enabled
|
Yields lineage if config is enabled
|
||||||
"""
|
"""
|
||||||
if self.source_config.dbServiceName:
|
if self.source_config.dbServiceName:
|
||||||
yield from self.yield_dashboard_lineage_details(dashboard_details)
|
yield from self.yield_dashboard_lineage_details(dashboard_details) or []
|
||||||
|
|
||||||
def yield_tag(self, *args, **kwargs) -> Optional[Iterable[OMetaTagAndCategory]]:
|
def yield_tag(self, *args, **kwargs) -> Optional[Iterable[OMetaTagAndCategory]]:
|
||||||
"""
|
"""
|
||||||
|
@ -243,40 +243,6 @@ class GlueSource(Source[Entity]):
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_downstream_tasks(self, task_unique_id, tasks):
|
|
||||||
downstream_tasks = []
|
|
||||||
for edges in tasks["Edges"]:
|
|
||||||
if (
|
|
||||||
edges["SourceId"] == task_unique_id
|
|
||||||
and edges["DestinationId"] in self.task_id_mapping.values()
|
|
||||||
):
|
|
||||||
downstream_tasks.append(
|
|
||||||
list(self.task_id_mapping.keys())[
|
|
||||||
list(self.task_id_mapping.values()).index(
|
|
||||||
edges["DestinationId"]
|
|
||||||
)
|
|
||||||
][:128]
|
|
||||||
)
|
|
||||||
return downstream_tasks
|
|
||||||
|
|
||||||
def get_tasks(self, tasks):
|
|
||||||
task_list = []
|
|
||||||
for task in tasks["Graph"]["Nodes"]:
|
|
||||||
task_name = task["Name"][:128]
|
|
||||||
self.task_id_mapping[task_name] = task["UniqueId"]
|
|
||||||
for task in tasks["Graph"]["Nodes"]:
|
|
||||||
task_list.append(
|
|
||||||
Task(
|
|
||||||
name=task["Name"],
|
|
||||||
displayName=task["Name"],
|
|
||||||
taskType=task["Type"],
|
|
||||||
downstreamTasks=self.get_downstream_tasks(
|
|
||||||
task["UniqueId"], tasks["Graph"]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return task_list
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -11,14 +11,14 @@
|
|||||||
"""
|
"""
|
||||||
Airbyte source to extract metadata
|
Airbyte source to extract metadata
|
||||||
"""
|
"""
|
||||||
import traceback
|
|
||||||
from dataclasses import dataclass, field
|
from typing import Iterable, Optional
|
||||||
from typing import Iterable, List, Optional
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
||||||
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||||
from metadata.generated.schema.entity.data.pipeline import (
|
from metadata.generated.schema.entity.data.pipeline import (
|
||||||
Pipeline,
|
|
||||||
PipelineStatus,
|
PipelineStatus,
|
||||||
StatusType,
|
StatusType,
|
||||||
Task,
|
Task,
|
||||||
@ -32,39 +32,21 @@ from metadata.generated.schema.entity.services.connections.pipeline.airbyteConne
|
|||||||
AirbyteConnection,
|
AirbyteConnection,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
||||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
|
||||||
from metadata.generated.schema.metadataIngestion.pipelineServiceMetadataPipeline import (
|
|
||||||
PipelineServiceMetadataPipeline,
|
|
||||||
)
|
|
||||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
Source as WorkflowSource,
|
Source as WorkflowSource,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.type.entityLineage import EntitiesEdge
|
from metadata.generated.schema.type.entityLineage import EntitiesEdge
|
||||||
from metadata.generated.schema.type.entityReference import EntityReference
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
from metadata.ingestion.api.common import Entity
|
from metadata.ingestion.api.source import InvalidSourceException
|
||||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
|
||||||
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.source.pipeline.pipeline_service import PipelineServiceSource
|
||||||
from metadata.utils import fqn
|
from metadata.utils import fqn
|
||||||
from metadata.utils.airbyte_client import AirbyteClient
|
from metadata.utils.airbyte_client import AirbyteClient
|
||||||
from metadata.utils.filters import filter_by_pipeline
|
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
|
||||||
logger = ingestion_logger()
|
logger = ingestion_logger()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class AirbyteSourceStatus(SourceStatus):
|
|
||||||
pipelines_scanned: List[str] = field(default_factory=list)
|
|
||||||
filtered: List[str] = field(default_factory=list)
|
|
||||||
|
|
||||||
def pipeline_scanned(self, topic: str) -> None:
|
|
||||||
self.pipelines_scanned.append(topic)
|
|
||||||
|
|
||||||
def dropped(self, topic: str) -> None:
|
|
||||||
self.filtered.append(topic)
|
|
||||||
|
|
||||||
|
|
||||||
STATUS_MAP = {
|
STATUS_MAP = {
|
||||||
"cancelled": StatusType.Failed,
|
"cancelled": StatusType.Failed,
|
||||||
"succeeded": StatusType.Successful,
|
"succeeded": StatusType.Successful,
|
||||||
@ -75,33 +57,27 @@ STATUS_MAP = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class AirbyteSource(Source[CreatePipelineRequest]):
|
class AirbytePipelineDetails(BaseModel):
|
||||||
|
"""
|
||||||
|
Wrapper Class to combine the workspace with connection
|
||||||
|
"""
|
||||||
|
|
||||||
|
workspace: dict
|
||||||
|
connection: dict
|
||||||
|
|
||||||
|
|
||||||
|
class AirbyteSource(PipelineServiceSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods ot extract
|
Implements the necessary methods ot extract
|
||||||
Pipeline metadata from Airflow's metadata db
|
Pipeline metadata from Airflow's metadata db
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config: WorkflowSource
|
|
||||||
report: AirbyteSourceStatus
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: WorkflowSource,
|
config: WorkflowSource,
|
||||||
metadata_config: OpenMetadataConnection,
|
metadata_config: OpenMetadataConnection,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__(config, metadata_config)
|
||||||
self.config = config
|
|
||||||
self.source_config: PipelineServiceMetadataPipeline = (
|
|
||||||
self.config.sourceConfig.config
|
|
||||||
)
|
|
||||||
self.service_connection = self.config.serviceConnection.__root__.config
|
|
||||||
self.metadata_config = metadata_config
|
|
||||||
self.metadata = OpenMetadata(self.metadata_config)
|
|
||||||
self.status = AirbyteSourceStatus()
|
|
||||||
self.service: PipelineService = self.metadata.get_service_or_create(
|
|
||||||
entity=PipelineService, config=config
|
|
||||||
)
|
|
||||||
|
|
||||||
self.client = AirbyteClient(self.service_connection)
|
self.client = AirbyteClient(self.service_connection)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -114,9 +90,6 @@ class AirbyteSource(Source[CreatePipelineRequest]):
|
|||||||
)
|
)
|
||||||
return cls(config, metadata_config)
|
return cls(config, metadata_config)
|
||||||
|
|
||||||
def prepare(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_connections_jobs(self, connection: dict, connection_url: str):
|
def get_connections_jobs(self, connection: dict, connection_url: str):
|
||||||
"""
|
"""
|
||||||
Returns the list of tasks linked to connection
|
Returns the list of tasks linked to connection
|
||||||
@ -130,46 +103,51 @@ class AirbyteSource(Source[CreatePipelineRequest]):
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
def fetch_pipeline(
|
def yield_pipeline(
|
||||||
self, connection: dict, workspace: dict
|
self, pipeline_details: AirbytePipelineDetails
|
||||||
) -> Iterable[CreatePipelineRequest]:
|
) -> Iterable[CreatePipelineRequest]:
|
||||||
"""
|
"""
|
||||||
Convert a Connection into a Pipeline Entity
|
Convert a Connection into a Pipeline Entity
|
||||||
:param connection: connection object from airbyte
|
:param pipeline_details: pipeline_details object from airbyte
|
||||||
:param connection: workspace object from airbyte
|
|
||||||
:return: Create Pipeline request with tasks
|
:return: Create Pipeline request with tasks
|
||||||
"""
|
"""
|
||||||
connection_url = f"/workspaces/{workspace.get('workspaceId')}/connections/{connection.get('connectionId')}"
|
connection_url = f"/workspaces/{pipeline_details.workspace.get('workspaceId')}/connections/{pipeline_details.connection.get('connectionId')}"
|
||||||
yield CreatePipelineRequest(
|
yield CreatePipelineRequest(
|
||||||
name=connection.get("connectionId"),
|
name=pipeline_details.connection.get("connectionId"),
|
||||||
displayName=connection.get("name"),
|
displayName=pipeline_details.connection.get("name"),
|
||||||
description="",
|
description="",
|
||||||
pipelineUrl=connection_url,
|
pipelineUrl=connection_url,
|
||||||
tasks=self.get_connections_jobs(connection, connection_url),
|
tasks=self.get_connections_jobs(
|
||||||
service=EntityReference(id=self.service.id, type="pipelineService"),
|
pipeline_details.connection, connection_url
|
||||||
|
),
|
||||||
|
service=EntityReference(
|
||||||
|
id=self.context.pipeline_service.id.__root__, type="pipelineService"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def fetch_pipeline_status(
|
def yield_pipeline_status(
|
||||||
self, workspace: dict, connection: dict, pipeline_fqn: str
|
self, pipeline_details: AirbytePipelineDetails
|
||||||
) -> OMetaPipelineStatus:
|
) -> Optional[OMetaPipelineStatus]:
|
||||||
"""
|
"""
|
||||||
Method to get task & pipeline status
|
Method to get task & pipeline status
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Airbyte does not offer specific attempt link, just at pipeline level
|
# Airbyte does not offer specific attempt link, just at pipeline level
|
||||||
log_link = (
|
log_link = (
|
||||||
f"{self.service_connection.hostPort}/workspaces/{workspace.get('workspaceId')}/connections/"
|
f"{self.service_connection.hostPort}/workspaces/{pipeline_details.workspace.get('workspaceId')}/connections/"
|
||||||
f"{connection.get('connectionId')}/status"
|
f"{pipeline_details.connection.get('connectionId')}/status"
|
||||||
)
|
)
|
||||||
|
|
||||||
for job in self.client.list_jobs(connection.get("connectionId")):
|
for job in self.client.list_jobs(
|
||||||
|
pipeline_details.connection.get("connectionId")
|
||||||
|
):
|
||||||
if not job or not job.get("attempts"):
|
if not job or not job.get("attempts"):
|
||||||
continue
|
continue
|
||||||
for attempt in job["attempts"]:
|
for attempt in job["attempts"]:
|
||||||
|
|
||||||
task_status = [
|
task_status = [
|
||||||
TaskStatus(
|
TaskStatus(
|
||||||
name=str(connection.get("connectionId")),
|
name=str(pipeline_details.connection.get("connectionId")),
|
||||||
executionStatus=STATUS_MAP.get(
|
executionStatus=STATUS_MAP.get(
|
||||||
attempt["status"].lower(), StatusType.Pending
|
attempt["status"].lower(), StatusType.Pending
|
||||||
).value,
|
).value,
|
||||||
@ -186,21 +164,23 @@ class AirbyteSource(Source[CreatePipelineRequest]):
|
|||||||
executionDate=attempt["createdAt"],
|
executionDate=attempt["createdAt"],
|
||||||
)
|
)
|
||||||
yield OMetaPipelineStatus(
|
yield OMetaPipelineStatus(
|
||||||
pipeline_fqn=pipeline_fqn, pipeline_status=pipeline_status
|
pipeline_fqn=self.context.pipeline.fullyQualifiedName.__root__,
|
||||||
|
pipeline_status=pipeline_status,
|
||||||
)
|
)
|
||||||
|
|
||||||
def fetch_lineage(
|
def yield_pipeline_lineage_details(
|
||||||
self, connection: dict, pipeline_entity: Pipeline
|
self, pipeline_details: AirbytePipelineDetails
|
||||||
) -> Optional[Iterable[AddLineageRequest]]:
|
) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
"""
|
"""
|
||||||
Parse all the stream available in the connection and create a lineage between them
|
Parse all the stream available in the connection and create a lineage between them
|
||||||
:param connection: connection object from airbyte
|
:param pipeline_details: pipeline_details object from airbyte
|
||||||
:param pipeline_entity: Pipeline we just ingested
|
|
||||||
:return: Lineage from inlets and outlets
|
:return: Lineage from inlets and outlets
|
||||||
"""
|
"""
|
||||||
source_connection = self.client.get_source(connection.get("sourceId"))
|
source_connection = self.client.get_source(
|
||||||
|
pipeline_details.connection.get("sourceId")
|
||||||
|
)
|
||||||
destination_connection = self.client.get_destination(
|
destination_connection = self.client.get_destination(
|
||||||
connection.get("destinationId")
|
pipeline_details.connection.get("destinationId")
|
||||||
)
|
)
|
||||||
source_service = self.metadata.get_by_name(
|
source_service = self.metadata.get_by_name(
|
||||||
entity=DatabaseService, fqn=source_connection.get("name")
|
entity=DatabaseService, fqn=source_connection.get("name")
|
||||||
@ -211,7 +191,9 @@ class AirbyteSource(Source[CreatePipelineRequest]):
|
|||||||
if not source_service or not destination_service:
|
if not source_service or not destination_service:
|
||||||
return
|
return
|
||||||
|
|
||||||
for task in connection.get("syncCatalog", {}).get("streams") or []:
|
for task in (
|
||||||
|
pipeline_details.connection.get("syncCatalog", {}).get("streams") or []
|
||||||
|
):
|
||||||
stream = task.get("stream")
|
stream = task.get("stream")
|
||||||
from_fqn = fqn.build(
|
from_fqn = fqn.build(
|
||||||
self.metadata,
|
self.metadata,
|
||||||
@ -239,57 +221,32 @@ class AirbyteSource(Source[CreatePipelineRequest]):
|
|||||||
yield AddLineageRequest(
|
yield AddLineageRequest(
|
||||||
edge=EntitiesEdge(
|
edge=EntitiesEdge(
|
||||||
fromEntity=EntityReference(id=from_entity.id, type="table"),
|
fromEntity=EntityReference(id=from_entity.id, type="table"),
|
||||||
toEntity=EntityReference(id=pipeline_entity.id, type="pipeline"),
|
toEntity=EntityReference(
|
||||||
|
id=self.context.pipeline.id.__root__, type="pipeline"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
yield AddLineageRequest(
|
yield AddLineageRequest(
|
||||||
edge=EntitiesEdge(
|
edge=EntitiesEdge(
|
||||||
toEntity=EntityReference(id=to_entity.id, type="table"),
|
toEntity=EntityReference(id=to_entity.id, type="table"),
|
||||||
fromEntity=EntityReference(id=pipeline_entity.id, type="pipeline"),
|
fromEntity=EntityReference(
|
||||||
|
id=self.context.pipeline.id.__root__, type="pipeline"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Entity]:
|
def get_pipelines_list(self) -> Iterable[AirbytePipelineDetails]:
|
||||||
"""
|
"""
|
||||||
Extract metadata information to create Pipelines with Tasks
|
Get List of all pipelines
|
||||||
"""
|
"""
|
||||||
for workspace in self.client.list_workspaces():
|
for workspace in self.client.list_workspaces():
|
||||||
for connection in self.client.list_connections(
|
for connection in self.client.list_connections(
|
||||||
workflow_id=workspace.get("workspaceId")
|
workflow_id=workspace.get("workspaceId")
|
||||||
):
|
):
|
||||||
try:
|
yield AirbytePipelineDetails(workspace=workspace, connection=connection)
|
||||||
if filter_by_pipeline(
|
|
||||||
self.source_config.pipelineFilterPattern,
|
|
||||||
connection.get("connectionId"),
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
yield from self.fetch_pipeline(connection, workspace)
|
|
||||||
pipeline_fqn = fqn.build(
|
|
||||||
self.metadata,
|
|
||||||
entity_type=Pipeline,
|
|
||||||
service_name=self.service.name.__root__,
|
|
||||||
pipeline_name=connection.get("connectionId"),
|
|
||||||
)
|
|
||||||
yield from self.fetch_pipeline_status(
|
|
||||||
workspace, connection, pipeline_fqn
|
|
||||||
)
|
|
||||||
if self.source_config.includeLineage:
|
|
||||||
pipeline_entity: Pipeline = self.metadata.get_by_name(
|
|
||||||
entity=Pipeline,
|
|
||||||
fqn=pipeline_fqn,
|
|
||||||
)
|
|
||||||
yield from self.fetch_lineage(connection, pipeline_entity) or []
|
|
||||||
|
|
||||||
except Exception as err:
|
def get_pipeline_name(self, pipeline_details: AirbytePipelineDetails) -> str:
|
||||||
logger.error(repr(err))
|
"""
|
||||||
logger.debug(traceback.format_exc())
|
Get Pipeline Name
|
||||||
self.status.failure(connection.get("connectionId"), repr(err))
|
"""
|
||||||
|
return pipeline_details.connection.get("connectionId")
|
||||||
def get_status(self):
|
|
||||||
return self.status
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def test_connection(self) -> None:
|
|
||||||
pass
|
|
||||||
|
@ -11,15 +11,12 @@
|
|||||||
"""
|
"""
|
||||||
Airflow source to extract metadata from OM UI
|
Airflow source to extract metadata from OM UI
|
||||||
"""
|
"""
|
||||||
import sys
|
|
||||||
import traceback
|
import traceback
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import Any, Iterable, List, Optional, cast
|
from typing import Any, Iterable, List, Optional, cast
|
||||||
|
|
||||||
from airflow.models import BaseOperator, DagRun
|
from airflow.models import BaseOperator, DagRun
|
||||||
from airflow.models.serialized_dag import SerializedDagModel
|
from airflow.models.serialized_dag import SerializedDagModel
|
||||||
from airflow.models.taskinstance import TaskInstance
|
|
||||||
from airflow.serialization.serialized_objects import SerializedDAG
|
from airflow.serialization.serialized_objects import SerializedDAG
|
||||||
from sqlalchemy.engine import Engine
|
from sqlalchemy.engine import Engine
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
@ -40,44 +37,24 @@ from metadata.generated.schema.entity.services.connections.metadata.openMetadata
|
|||||||
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
|
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
|
||||||
AirflowConnection,
|
AirflowConnection,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
|
||||||
from metadata.generated.schema.metadataIngestion.pipelineServiceMetadataPipeline import (
|
|
||||||
PipelineServiceMetadataPipeline,
|
|
||||||
)
|
|
||||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
Source as WorkflowSource,
|
Source as WorkflowSource,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.type.entityLineage import EntitiesEdge
|
from metadata.generated.schema.type.entityLineage import EntitiesEdge
|
||||||
from metadata.generated.schema.type.entityReference import EntityReference
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
from metadata.ingestion.api.common import Entity
|
from metadata.ingestion.api.source import InvalidSourceException
|
||||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
|
||||||
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
from metadata.ingestion.source.pipeline.pipeline_service import PipelineServiceSource
|
||||||
from metadata.utils import fqn
|
|
||||||
from metadata.utils.connections import (
|
from metadata.utils.connections import (
|
||||||
create_and_bind_session,
|
create_and_bind_session,
|
||||||
get_connection,
|
get_connection,
|
||||||
test_connection,
|
test_connection,
|
||||||
)
|
)
|
||||||
from metadata.utils.filters import filter_by_pipeline
|
|
||||||
from metadata.utils.helpers import datetime_to_ts
|
from metadata.utils.helpers import datetime_to_ts
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
|
||||||
logger = ingestion_logger()
|
logger = ingestion_logger()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class AirflowSourceStatus(SourceStatus):
|
|
||||||
pipelines_scanned: List[str] = field(default_factory=list)
|
|
||||||
filtered: List[str] = field(default_factory=list)
|
|
||||||
|
|
||||||
def pipeline_scanned(self, topic: str) -> None:
|
|
||||||
self.pipelines_scanned.append(topic)
|
|
||||||
|
|
||||||
def dropped(self, topic: str) -> None:
|
|
||||||
self.filtered.append(topic)
|
|
||||||
|
|
||||||
|
|
||||||
STATUS_MAP = {
|
STATUS_MAP = {
|
||||||
"success": StatusType.Successful.value,
|
"success": StatusType.Successful.value,
|
||||||
"failed": StatusType.Failed.value,
|
"failed": StatusType.Failed.value,
|
||||||
@ -85,38 +62,24 @@ STATUS_MAP = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class AirflowSource(Source[CreatePipelineRequest]):
|
class AirflowSource(PipelineServiceSource):
|
||||||
"""
|
"""
|
||||||
Implements the necessary methods ot extract
|
Implements the necessary methods ot extract
|
||||||
Pipeline metadata from Airflow's metadata db
|
Pipeline metadata from Airflow's metadata db
|
||||||
"""
|
"""
|
||||||
|
|
||||||
config: WorkflowSource
|
config: WorkflowSource
|
||||||
report: AirflowSourceStatus
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: WorkflowSource,
|
config: WorkflowSource,
|
||||||
metadata_config: OpenMetadataConnection,
|
metadata_config: OpenMetadataConnection,
|
||||||
):
|
):
|
||||||
super().__init__()
|
|
||||||
self.config = config
|
|
||||||
self.source_config: PipelineServiceMetadataPipeline = (
|
|
||||||
self.config.sourceConfig.config
|
|
||||||
)
|
|
||||||
self.service_connection = self.config.serviceConnection.__root__.config
|
|
||||||
self.metadata_config = metadata_config
|
|
||||||
self.metadata = OpenMetadata(self.metadata_config)
|
|
||||||
self.status = AirflowSourceStatus()
|
|
||||||
self.service: PipelineService = self.metadata.get_service_or_create(
|
|
||||||
entity=PipelineService, config=config
|
|
||||||
)
|
|
||||||
self.numberOfStatus = (
|
|
||||||
self.config.serviceConnection.__root__.config.numberOfStatus
|
|
||||||
)
|
|
||||||
# Create the connection to the database
|
|
||||||
self._session = None
|
self._session = None
|
||||||
|
self.service_connection = config.serviceConnection.__root__.config
|
||||||
self.engine: Engine = get_connection(self.service_connection.connection)
|
self.engine: Engine = get_connection(self.service_connection.connection)
|
||||||
|
super().__init__(config, metadata_config)
|
||||||
|
# Create the connection to the database
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
|
||||||
@ -138,22 +101,20 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
|
|
||||||
return self._session
|
return self._session
|
||||||
|
|
||||||
def prepare(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_pipeline_status(self, dag_id: str) -> DagRun:
|
def get_pipeline_status(self, dag_id: str) -> DagRun:
|
||||||
dag_run_list: DagRun = (
|
dag_run_list: DagRun = (
|
||||||
self.session.query(DagRun)
|
self.session.query(DagRun)
|
||||||
.filter(DagRun.dag_id == dag_id)
|
.filter(DagRun.dag_id == dag_id)
|
||||||
.order_by(DagRun.execution_date.desc())
|
.order_by(DagRun.execution_date.desc())
|
||||||
.limit(self.numberOfStatus)
|
.limit(self.config.serviceConnection.__root__.config.numberOfStatus)
|
||||||
)
|
)
|
||||||
return dag_run_list
|
return dag_run_list
|
||||||
|
|
||||||
def fetch_pipeline_status(
|
def yield_pipeline_status(
|
||||||
self, serialized_dag: SerializedDAG, pipeline_fqn: str
|
self, pipeline_details: SerializedDAG
|
||||||
) -> OMetaPipelineStatus:
|
) -> OMetaPipelineStatus:
|
||||||
dag_run_list = self.get_pipeline_status(serialized_dag.dag_id)
|
dag_run_list = self.get_pipeline_status(pipeline_details.dag_id)
|
||||||
|
|
||||||
for dag in dag_run_list:
|
for dag in dag_run_list:
|
||||||
if isinstance(dag.task_instances, Iterable):
|
if isinstance(dag.task_instances, Iterable):
|
||||||
tasks = dag.task_instances
|
tasks = dag.task_instances
|
||||||
@ -179,10 +140,11 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
executionDate=dag.execution_date.timestamp(),
|
executionDate=dag.execution_date.timestamp(),
|
||||||
)
|
)
|
||||||
yield OMetaPipelineStatus(
|
yield OMetaPipelineStatus(
|
||||||
pipeline_fqn=pipeline_fqn, pipeline_status=pipeline_status
|
pipeline_fqn=self.context.pipeline.fullyQualifiedName.__root__,
|
||||||
|
pipeline_status=pipeline_status,
|
||||||
)
|
)
|
||||||
|
|
||||||
def list_dags(self) -> Iterable[SerializedDagModel]:
|
def get_pipelines_list(self) -> Iterable[SerializedDagModel]:
|
||||||
"""
|
"""
|
||||||
List all DAGs from the metadata db.
|
List all DAGs from the metadata db.
|
||||||
|
|
||||||
@ -192,6 +154,12 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
for serialized_dag in self.session.query(SerializedDagModel).all():
|
for serialized_dag in self.session.query(SerializedDagModel).all():
|
||||||
yield serialized_dag
|
yield serialized_dag
|
||||||
|
|
||||||
|
def get_pipeline_name(self, pipeline_details: SerializedDAG) -> str:
|
||||||
|
"""
|
||||||
|
Get Pipeline Name
|
||||||
|
"""
|
||||||
|
return pipeline_details.dag_id
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_tasks_from_dag(dag: SerializedDAG) -> List[Task]:
|
def get_tasks_from_dag(dag: SerializedDAG) -> List[Task]:
|
||||||
"""
|
"""
|
||||||
@ -213,25 +181,26 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
for task in cast(Iterable[BaseOperator], dag.tasks)
|
for task in cast(Iterable[BaseOperator], dag.tasks)
|
||||||
]
|
]
|
||||||
|
|
||||||
def fetch_pipeline(
|
def yield_pipeline(
|
||||||
self, serialized_dag: SerializedDagModel
|
self, pipeline_details: SerializedDagModel
|
||||||
) -> Iterable[CreatePipelineRequest]:
|
) -> Iterable[CreatePipelineRequest]:
|
||||||
"""
|
"""
|
||||||
Convert a DAG into a Pipeline Entity
|
Convert a DAG into a Pipeline Entity
|
||||||
:param serialized_dag: SerializedDAG from airflow metadata DB
|
:param serialized_dag: SerializedDAG from airflow metadata DB
|
||||||
:return: Create Pipeline request with tasks
|
:return: Create Pipeline request with tasks
|
||||||
"""
|
"""
|
||||||
dag: SerializedDAG = serialized_dag.dag
|
dag: SerializedDAG = pipeline_details.dag
|
||||||
|
|
||||||
yield CreatePipelineRequest(
|
yield CreatePipelineRequest(
|
||||||
name=serialized_dag.dag_id,
|
name=pipeline_details.dag_id,
|
||||||
description=dag.description,
|
description=dag.description,
|
||||||
pipelineUrl=f"/tree?dag_id={dag.dag_id}", # Just the suffix
|
pipelineUrl=f"/tree?dag_id={dag.dag_id}", # Just the suffix
|
||||||
concurrency=dag.concurrency,
|
concurrency=dag.concurrency,
|
||||||
pipelineLocation=serialized_dag.fileloc,
|
pipelineLocation=pipeline_details.fileloc,
|
||||||
startDate=dag.start_date.isoformat() if dag.start_date else None,
|
startDate=dag.start_date.isoformat() if dag.start_date else None,
|
||||||
tasks=self.get_tasks_from_dag(dag),
|
tasks=self.get_tasks_from_dag(dag),
|
||||||
service=EntityReference(id=self.service.id, type="pipelineService"),
|
service=EntityReference(
|
||||||
|
id=self.context.pipeline_service.id.__root__, type="pipelineService"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -280,16 +249,15 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
logger.warn(f"Error trying to parse outlets - {err}")
|
logger.warn(f"Error trying to parse outlets - {err}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def fetch_lineage(
|
def yield_pipeline_lineage_details(
|
||||||
self, serialized_dag: SerializedDagModel, pipeline_entity: Pipeline
|
self, pipeline_details: SerializedDagModel
|
||||||
) -> Iterable[AddLineageRequest]:
|
) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
"""
|
"""
|
||||||
Parse xlets and add lineage between Pipelines and Tables
|
Parse xlets and add lineage between Pipelines and Tables
|
||||||
:param serialized_dag: SerializedDAG from airflow metadata DB
|
:param pipeline_details: SerializedDAG from airflow metadata DB
|
||||||
:param pipeline_entity: Pipeline we just ingested
|
|
||||||
:return: Lineage from inlets and outlets
|
:return: Lineage from inlets and outlets
|
||||||
"""
|
"""
|
||||||
dag: SerializedDAG = serialized_dag.dag
|
dag: SerializedDAG = pipeline_details.dag
|
||||||
|
|
||||||
for task in dag.tasks:
|
for task in dag.tasks:
|
||||||
for table_fqn in self.get_inlets(task) or []:
|
for table_fqn in self.get_inlets(task) or []:
|
||||||
@ -303,14 +271,14 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
id=table_entity.id, type="table"
|
id=table_entity.id, type="table"
|
||||||
),
|
),
|
||||||
toEntity=EntityReference(
|
toEntity=EntityReference(
|
||||||
id=pipeline_entity.id, type="pipeline"
|
id=self.context.pipeline.id.__root__, type="pipeline"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.warn(
|
logger.warn(
|
||||||
f"Could not find Table [{table_fqn}] from "
|
f"Could not find Table [{table_fqn}] from "
|
||||||
f"[{pipeline_entity.fullyQualifiedName.__root__}] inlets"
|
f"[{self.context.pipeline.fullyQualifiedName.__root__}] inlets"
|
||||||
)
|
)
|
||||||
|
|
||||||
for table_fqn in self.get_outlets(task) or []:
|
for table_fqn in self.get_outlets(task) or []:
|
||||||
@ -321,7 +289,7 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
yield AddLineageRequest(
|
yield AddLineageRequest(
|
||||||
edge=EntitiesEdge(
|
edge=EntitiesEdge(
|
||||||
fromEntity=EntityReference(
|
fromEntity=EntityReference(
|
||||||
id=pipeline_entity.id, type="pipeline"
|
id=self.context.pipeline.id.__root__, type="pipeline"
|
||||||
),
|
),
|
||||||
toEntity=EntityReference(id=table_entity.id, type="table"),
|
toEntity=EntityReference(id=table_entity.id, type="table"),
|
||||||
)
|
)
|
||||||
@ -329,45 +297,9 @@ class AirflowSource(Source[CreatePipelineRequest]):
|
|||||||
else:
|
else:
|
||||||
logger.warn(
|
logger.warn(
|
||||||
f"Could not find Table [{table_fqn}] from "
|
f"Could not find Table [{table_fqn}] from "
|
||||||
f"[{pipeline_entity.fullyQualifiedName.__root__}] outlets"
|
f"[{self.context.pipeline.fullyQualifiedName.__root__}] outlets"
|
||||||
)
|
)
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Entity]:
|
|
||||||
"""
|
|
||||||
Extract metadata information to create Pipelines with Tasks
|
|
||||||
and lineage
|
|
||||||
"""
|
|
||||||
for serialized_dag in self.list_dags():
|
|
||||||
try:
|
|
||||||
if not filter_by_pipeline(
|
|
||||||
self.source_config.pipelineFilterPattern, serialized_dag.dag_id
|
|
||||||
):
|
|
||||||
yield from self.fetch_pipeline(serialized_dag)
|
|
||||||
pipeline_fqn = fqn.build(
|
|
||||||
self.metadata,
|
|
||||||
entity_type=Pipeline,
|
|
||||||
service_name=self.service.name.__root__,
|
|
||||||
pipeline_name=serialized_dag.dag_id,
|
|
||||||
)
|
|
||||||
yield from self.fetch_pipeline_status(serialized_dag, pipeline_fqn)
|
|
||||||
if self.source_config.includeLineage:
|
|
||||||
pipeline_entity: Pipeline = self.metadata.get_by_name(
|
|
||||||
entity=Pipeline,
|
|
||||||
fqn=pipeline_fqn,
|
|
||||||
)
|
|
||||||
yield from self.fetch_lineage(serialized_dag, pipeline_entity)
|
|
||||||
else:
|
|
||||||
self.status.dropped(serialized_dag.dag_id)
|
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
logger.error(repr(err))
|
|
||||||
logger.debug(traceback.format_exc())
|
|
||||||
logger.debug(sys.exc_info()[2])
|
|
||||||
self.status.failure(serialized_dag.dag_id, repr(err))
|
|
||||||
|
|
||||||
def get_status(self):
|
|
||||||
return self.status
|
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
self.session.close()
|
self.session.close()
|
||||||
|
|
||||||
|
@ -8,35 +8,30 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
from typing import Any, Iterable, Optional
|
||||||
from typing import Iterable
|
|
||||||
|
|
||||||
from metadata.generated.schema.entity.data.pipeline import Pipeline, Task
|
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
||||||
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||||
|
from metadata.generated.schema.entity.data.pipeline import (
|
||||||
|
PipelineStatus,
|
||||||
|
StatusType,
|
||||||
|
Task,
|
||||||
|
TaskStatus,
|
||||||
|
)
|
||||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||||
OpenMetadataConnection,
|
OpenMetadataConnection,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.services.connections.pipeline.glueConnection import (
|
from metadata.generated.schema.entity.services.connections.pipeline.glueConnection import (
|
||||||
GlueConnection,
|
GlueConnection,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.entity.services.connections.pipeline.glueConnection import (
|
|
||||||
GlueConnection as GluePipelineConnection,
|
|
||||||
)
|
|
||||||
from metadata.generated.schema.entity.services.pipelineService import (
|
|
||||||
PipelineConnection,
|
|
||||||
PipelineService,
|
|
||||||
)
|
|
||||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
Source as WorkflowSource,
|
Source as WorkflowSource,
|
||||||
)
|
)
|
||||||
from metadata.generated.schema.type.entityReference import EntityReference
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
from metadata.ingestion.api.common import Entity
|
from metadata.ingestion.api.source import InvalidSourceException
|
||||||
from metadata.ingestion.api.source import InvalidSourceException, Source, SourceStatus
|
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
||||||
from metadata.ingestion.models.ometa_table_db import OMetaDatabaseAndTable
|
from metadata.ingestion.source.pipeline.pipeline_service import PipelineServiceSource
|
||||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
||||||
from metadata.ingestion.source.database.common_db_source import SQLSourceStatus
|
|
||||||
from metadata.utils.connections import get_connection, test_connection
|
|
||||||
from metadata.utils.logger import ingestion_logger
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
|
||||||
logger = ingestion_logger()
|
logger = ingestion_logger()
|
||||||
@ -44,31 +39,22 @@ logger = ingestion_logger()
|
|||||||
GRAPH = "Graph"
|
GRAPH = "Graph"
|
||||||
NODES = "Nodes"
|
NODES = "Nodes"
|
||||||
NAME = "Name"
|
NAME = "Name"
|
||||||
|
JOB_TYPE = "JOB"
|
||||||
|
STATUS_MAP = {
|
||||||
|
"cancelled": StatusType.Failed,
|
||||||
|
"succeeded": StatusType.Successful,
|
||||||
|
"failed": StatusType.Failed,
|
||||||
|
"running": StatusType.Pending,
|
||||||
|
"incomplete": StatusType.Failed,
|
||||||
|
"pending": StatusType.Pending,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class GlueSource(Source[Entity]):
|
class GlueSource(PipelineServiceSource):
|
||||||
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
|
||||||
super().__init__()
|
super().__init__(config, metadata_config)
|
||||||
self.status = SQLSourceStatus()
|
self.task_id_mapping = {}
|
||||||
self.config = config
|
self.job_name_list = set()
|
||||||
self.metadata_config = metadata_config
|
|
||||||
self.metadata = OpenMetadata(metadata_config)
|
|
||||||
|
|
||||||
self.service_connection = self.config.serviceConnection.__root__.config
|
|
||||||
self.pipeline_service = self.metadata.get_service_or_create(
|
|
||||||
entity=PipelineService,
|
|
||||||
config=WorkflowSource(
|
|
||||||
type="glue",
|
|
||||||
serviceName=self.config.serviceName,
|
|
||||||
serviceConnection=PipelineConnection(
|
|
||||||
config=GluePipelineConnection(
|
|
||||||
awsConfig=self.service_connection.awsConfig
|
|
||||||
),
|
|
||||||
),
|
|
||||||
sourceConfig={},
|
|
||||||
),
|
|
||||||
)
|
|
||||||
self.connection = get_connection(self.service_connection)
|
|
||||||
self.glue = self.connection.client
|
self.glue = self.connection.client
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -81,55 +67,101 @@ class GlueSource(Source[Entity]):
|
|||||||
)
|
)
|
||||||
return cls(config, metadata_config)
|
return cls(config, metadata_config)
|
||||||
|
|
||||||
def prepare(self):
|
def get_pipelines_list(self) -> Iterable[dict]:
|
||||||
pass
|
"""
|
||||||
|
Get List of all pipelines
|
||||||
|
"""
|
||||||
|
for workflow in self.glue.list_workflows()["Workflows"]:
|
||||||
|
jobs = self.glue.get_workflow(Name=workflow, IncludeGraph=True)["Workflow"]
|
||||||
|
yield jobs
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Entity]:
|
def get_pipeline_name(self, pipeline_details: dict) -> str:
|
||||||
|
"""
|
||||||
|
Get Pipeline Name
|
||||||
|
"""
|
||||||
|
return pipeline_details[NAME]
|
||||||
|
|
||||||
yield from self.ingest_pipelines()
|
def yield_pipeline(self, pipeline_details: Any) -> Iterable[CreatePipelineRequest]:
|
||||||
|
"""
|
||||||
|
Method to Get Pipeline Entity
|
||||||
|
"""
|
||||||
|
self.job_name_list = set()
|
||||||
|
pipeline_ev = CreatePipelineRequest(
|
||||||
|
name=pipeline_details[NAME],
|
||||||
|
displayName=pipeline_details[NAME],
|
||||||
|
description="",
|
||||||
|
tasks=self.get_tasks(pipeline_details),
|
||||||
|
service=EntityReference(
|
||||||
|
id=self.context.pipeline_service.id.__root__, type="pipelineService"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
yield pipeline_ev
|
||||||
|
|
||||||
def get_tasks(self, tasks):
|
def get_tasks(self, pipeline_details: Any) -> Task:
|
||||||
task_list = []
|
task_list = []
|
||||||
for task in tasks[GRAPH][NODES]:
|
for task in pipeline_details["Graph"]["Nodes"]:
|
||||||
|
self.task_id_mapping[task["UniqueId"]] = task["Name"][:128]
|
||||||
|
if task["Type"] == JOB_TYPE:
|
||||||
|
self.job_name_list.add(task[NAME])
|
||||||
|
for task in pipeline_details[GRAPH][NODES]:
|
||||||
task_list.append(
|
task_list.append(
|
||||||
Task(
|
Task(
|
||||||
name=task[NAME],
|
name=task[NAME],
|
||||||
displayName=task[NAME],
|
displayName=task[NAME],
|
||||||
taskType=task["Type"],
|
taskType=task["Type"],
|
||||||
downstreamTasks=self.get_downstream_tasks(
|
downstreamTasks=self.get_downstream_tasks(
|
||||||
task["UniqueId"], tasks[GRAPH]
|
task["UniqueId"], pipeline_details[GRAPH]
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return task_list
|
return task_list
|
||||||
|
|
||||||
def ingest_pipelines(self) -> Iterable[OMetaDatabaseAndTable]:
|
def get_downstream_tasks(self, task_unique_id, tasks):
|
||||||
|
downstream_tasks = []
|
||||||
|
for edges in tasks["Edges"]:
|
||||||
|
if edges["SourceId"] == task_unique_id and self.task_id_mapping.get(
|
||||||
|
edges["DestinationId"]
|
||||||
|
):
|
||||||
|
downstream_tasks.append(self.task_id_mapping[edges["DestinationId"]])
|
||||||
|
return downstream_tasks
|
||||||
|
|
||||||
|
def yield_pipeline_status(
|
||||||
|
self, pipeline_details: Any
|
||||||
|
) -> Iterable[OMetaPipelineStatus]:
|
||||||
|
for job in self.job_name_list:
|
||||||
try:
|
try:
|
||||||
for workflow in self.glue.list_workflows()["Workflows"]:
|
runs = self.glue.get_job_runs(JobName=job)
|
||||||
jobs = self.glue.get_workflow(Name=workflow, IncludeGraph=True)[
|
runs = runs.get("JobRuns", [])
|
||||||
"Workflow"
|
for attempt in runs:
|
||||||
]
|
task_status = []
|
||||||
tasks = self.get_tasks(jobs)
|
task_status.append(
|
||||||
pipeline_ev = Pipeline(
|
TaskStatus(
|
||||||
id=uuid.uuid4(),
|
name=attempt["JobName"],
|
||||||
name=jobs[NAME],
|
executionStatus=STATUS_MAP.get(
|
||||||
displayName=jobs[NAME],
|
attempt["JobRunState"].lower(), StatusType.Pending
|
||||||
description="",
|
).value,
|
||||||
tasks=tasks,
|
startTime=attempt["StartedOn"].timestamp(),
|
||||||
service=EntityReference(
|
endTime=attempt["CompletedOn"].timestamp(),
|
||||||
id=self.pipeline_service.id, type="pipelineService"
|
)
|
||||||
),
|
)
|
||||||
|
pipeline_status = PipelineStatus(
|
||||||
|
taskStatus=task_status,
|
||||||
|
executionDate=attempt["StartedOn"].timestamp(),
|
||||||
|
executionStatus=STATUS_MAP.get(
|
||||||
|
attempt["JobRunState"].lower(), StatusType.Pending
|
||||||
|
).value,
|
||||||
|
)
|
||||||
|
yield OMetaPipelineStatus(
|
||||||
|
pipeline_fqn=self.context.pipeline.fullyQualifiedName.__root__,
|
||||||
|
pipeline_status=pipeline_status,
|
||||||
)
|
)
|
||||||
yield pipeline_ev
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.error(err)
|
logger.error(err)
|
||||||
|
|
||||||
def close(self):
|
def yield_pipeline_lineage_details(
|
||||||
pass
|
self, pipeline_details: Any
|
||||||
|
) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
def get_status(self) -> SourceStatus:
|
"""
|
||||||
return self.status
|
Get lineage between pipeline and data sources
|
||||||
|
"""
|
||||||
def test_connection(self) -> None:
|
|
||||||
test_connection(self.connection)
|
|
||||||
|
@ -0,0 +1,227 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
Base class for ingesting database services
|
||||||
|
"""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Iterable, List, Optional
|
||||||
|
|
||||||
|
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
||||||
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||||
|
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
||||||
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||||
|
OpenMetadataConnection,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.entity.services.pipelineService import (
|
||||||
|
PipelineConnection,
|
||||||
|
PipelineService,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.metadataIngestion.pipelineServiceMetadataPipeline import (
|
||||||
|
PipelineServiceMetadataPipeline,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
Source as WorkflowSource,
|
||||||
|
)
|
||||||
|
from metadata.ingestion.api.source import Source, SourceStatus
|
||||||
|
from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
|
||||||
|
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
||||||
|
from metadata.ingestion.models.topology import (
|
||||||
|
NodeStage,
|
||||||
|
ServiceTopology,
|
||||||
|
TopologyNode,
|
||||||
|
create_source_context,
|
||||||
|
)
|
||||||
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||||
|
from metadata.utils.connections import get_connection, test_connection
|
||||||
|
from metadata.utils.filters import filter_by_dashboard
|
||||||
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
|
||||||
|
logger = ingestion_logger()
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineServiceTopology(ServiceTopology):
|
||||||
|
"""
|
||||||
|
Defines the hierarchy in Pipeline Services.
|
||||||
|
|
||||||
|
We could have a topology validator. We can only consume
|
||||||
|
data that has been produced by any parent node.
|
||||||
|
"""
|
||||||
|
|
||||||
|
root = TopologyNode(
|
||||||
|
producer="get_services",
|
||||||
|
stages=[
|
||||||
|
NodeStage(
|
||||||
|
type_=PipelineService,
|
||||||
|
context="pipeline_service",
|
||||||
|
processor="yield_pipeline_service",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
children=["pipeline"],
|
||||||
|
)
|
||||||
|
pipeline = TopologyNode(
|
||||||
|
producer="get_pipeline",
|
||||||
|
stages=[
|
||||||
|
NodeStage(
|
||||||
|
type_=Pipeline,
|
||||||
|
context="pipeline",
|
||||||
|
processor="yield_pipeline",
|
||||||
|
consumer=["pipeline_service"],
|
||||||
|
),
|
||||||
|
NodeStage(
|
||||||
|
type_=OMetaPipelineStatus,
|
||||||
|
context="pipeline_status",
|
||||||
|
processor="yield_pipeline_status",
|
||||||
|
consumer=["pipeline_service"],
|
||||||
|
nullable=True,
|
||||||
|
ack_sink=False,
|
||||||
|
),
|
||||||
|
NodeStage(
|
||||||
|
type_=AddLineageRequest,
|
||||||
|
context="lineage",
|
||||||
|
processor="yield_pipeline_lineage",
|
||||||
|
consumer=["pipeline_service"],
|
||||||
|
ack_sink=False,
|
||||||
|
nullable=True,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PipelineSourceStatus(SourceStatus):
|
||||||
|
"""
|
||||||
|
Reports the source status after ingestion
|
||||||
|
"""
|
||||||
|
|
||||||
|
pipelines_scanned: List[str] = field(default_factory=list)
|
||||||
|
filtered: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
def pipeline_scanned(self, topic: str) -> None:
|
||||||
|
self.pipelines_scanned.append(topic)
|
||||||
|
|
||||||
|
def dropped(self, topic: str) -> None:
|
||||||
|
self.filtered.append(topic)
|
||||||
|
|
||||||
|
|
||||||
|
class PipelineServiceSource(TopologyRunnerMixin, Source, ABC):
|
||||||
|
"""
|
||||||
|
Base class for Pipeline Services.
|
||||||
|
It implements the topology and context.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def yield_pipeline(self, pipeline_details: Any) -> Iterable[CreatePipelineRequest]:
|
||||||
|
"""
|
||||||
|
Method to Get Pipeline Entity
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def yield_pipeline_lineage_details(
|
||||||
|
self, pipeline_details: Any
|
||||||
|
) -> Optional[Iterable[AddLineageRequest]]:
|
||||||
|
"""
|
||||||
|
Get lineage between pipeline and data sources
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_pipelines_list(self) -> Optional[List[Any]]:
|
||||||
|
"""
|
||||||
|
Get List of all pipelines
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_pipeline_name(self, pipeline_details: Any) -> str:
|
||||||
|
"""
|
||||||
|
Get Pipeline Name
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def yield_pipeline_status(
|
||||||
|
self, pipeline_details: Any
|
||||||
|
) -> Optional[OMetaPipelineStatus]:
|
||||||
|
"""
|
||||||
|
Get Pipeline Status
|
||||||
|
"""
|
||||||
|
|
||||||
|
def yield_pipeline_lineage(
|
||||||
|
self, pipeline_details: Any
|
||||||
|
) -> Iterable[AddLineageRequest]:
|
||||||
|
"""
|
||||||
|
Yields lineage if config is enabled
|
||||||
|
"""
|
||||||
|
if self.source_config.includeLineage:
|
||||||
|
yield from self.yield_pipeline_lineage_details(pipeline_details) or []
|
||||||
|
|
||||||
|
status: PipelineSourceStatus
|
||||||
|
source_config: PipelineServiceMetadataPipeline
|
||||||
|
config: WorkflowSource
|
||||||
|
metadata: OpenMetadata
|
||||||
|
# Big union of types we want to fetch dynamically
|
||||||
|
service_connection: PipelineConnection.__fields__["config"].type_
|
||||||
|
|
||||||
|
topology = PipelineServiceTopology()
|
||||||
|
context = create_source_context(topology)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
config: WorkflowSource,
|
||||||
|
metadata_config: OpenMetadataConnection,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.config = config
|
||||||
|
self.metadata_config = metadata_config
|
||||||
|
self.metadata = OpenMetadata(metadata_config)
|
||||||
|
self.service_connection = self.config.serviceConnection.__root__.config
|
||||||
|
self.source_config: PipelineServiceMetadataPipeline = (
|
||||||
|
self.config.sourceConfig.config
|
||||||
|
)
|
||||||
|
self.connection = get_connection(self.service_connection)
|
||||||
|
self.test_connection()
|
||||||
|
self.status = PipelineSourceStatus()
|
||||||
|
|
||||||
|
def get_status(self) -> SourceStatus:
|
||||||
|
return self.status
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""
|
||||||
|
Method to implement any required logic after the ingesion process is completed
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_services(self) -> Iterable[WorkflowSource]:
|
||||||
|
yield self.config
|
||||||
|
|
||||||
|
def yield_pipeline_service(self, config: WorkflowSource):
|
||||||
|
yield self.metadata.get_create_service_from_source(
|
||||||
|
entity=PipelineService, config=config
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_pipeline(self) -> Any:
|
||||||
|
for pipeline_detail in self.get_pipelines_list():
|
||||||
|
if filter_by_dashboard(
|
||||||
|
self.source_config.pipelineFilterPattern,
|
||||||
|
self.get_pipeline_name(pipeline_detail),
|
||||||
|
):
|
||||||
|
self.status.filter(
|
||||||
|
self.get_pipeline_name(pipeline_detail),
|
||||||
|
"Pipeline Pattern not Allowed",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
yield pipeline_detail
|
||||||
|
|
||||||
|
def test_connection(self) -> None:
|
||||||
|
test_connection(self.connection)
|
||||||
|
|
||||||
|
def prepare(self):
|
||||||
|
"""
|
||||||
|
Method to implement any required logic before starting the ingesion process
|
||||||
|
"""
|
218
ingestion/tests/unit/resources/datasets/airbyte_dataset.json
Normal file
218
ingestion/tests/unit/resources/datasets/airbyte_dataset.json
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
{
|
||||||
|
"workspace": [
|
||||||
|
{
|
||||||
|
"workspaceId": "af5680ec-2687-4fe0-bd55-5ad5f020a603",
|
||||||
|
"customerId": "de633783-f0f2-4248-9ea2-092c4b5e1165",
|
||||||
|
"email": "mayur@getcollate.io",
|
||||||
|
"name": "af5680ec-2687-4fe0-bd55-5ad5f020a603",
|
||||||
|
"slug": "af5680ec-2687-4fe0-bd55-5ad5f020a603",
|
||||||
|
"initialSetupComplete": true,
|
||||||
|
"displaySetupWizard": false,
|
||||||
|
"anonymousDataCollection": false,
|
||||||
|
"news": false,
|
||||||
|
"securityUpdates": false,
|
||||||
|
"notifications": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"connection": [
|
||||||
|
{
|
||||||
|
"connectionId": "a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
"name": "MSSQL <> Postgres",
|
||||||
|
"namespaceDefinition": "source",
|
||||||
|
"namespaceFormat": "${SOURCE_NAMESPACE}",
|
||||||
|
"prefix": "",
|
||||||
|
"sourceId": "ed3783a9-8479-4be0-80aa-2075ccc6e310",
|
||||||
|
"destinationId": "4181764f-3ed8-44cb-9b8e-fefff7fb5ea9",
|
||||||
|
"operationIds": ["942463dd-f37c-4794-8931-88da6c166a04"],
|
||||||
|
"syncCatalog": {
|
||||||
|
"streams": [
|
||||||
|
{
|
||||||
|
"stream": {
|
||||||
|
"name": "brands",
|
||||||
|
"jsonSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"brand_id": {"type": "number"},
|
||||||
|
"brand_name": {"type": "string"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"supportedSyncModes": ["full_refresh", "incremental"],
|
||||||
|
"defaultCursorField": [],
|
||||||
|
"sourceDefinedPrimaryKey": [["brand_id"]],
|
||||||
|
"namespace": "production"
|
||||||
|
},
|
||||||
|
"config": {
|
||||||
|
"syncMode": "full_refresh",
|
||||||
|
"cursorField": [],
|
||||||
|
"destinationSyncMode": "overwrite",
|
||||||
|
"primaryKey": [["brand_id"]],
|
||||||
|
"aliasName": "brands",
|
||||||
|
"selected": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"stream": {
|
||||||
|
"name": "categories",
|
||||||
|
"jsonSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"category_id": {"type": "number"},
|
||||||
|
"category_name": {"type": "string"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"supportedSyncModes": ["full_refresh", "incremental"],
|
||||||
|
"defaultCursorField": [],
|
||||||
|
"sourceDefinedPrimaryKey": [["category_id"]],
|
||||||
|
"namespace": "production"
|
||||||
|
},
|
||||||
|
"config": {
|
||||||
|
"syncMode": "full_refresh",
|
||||||
|
"cursorField": [],
|
||||||
|
"destinationSyncMode": "overwrite",
|
||||||
|
"primaryKey": [["category_id"]],
|
||||||
|
"aliasName": "categories",
|
||||||
|
"selected": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"stream": {
|
||||||
|
"name": "products",
|
||||||
|
"jsonSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"brand_id": {"type": "number"},
|
||||||
|
"list_price": {"type": "number"},
|
||||||
|
"model_year": {"type": "number"},
|
||||||
|
"product_id": {"type": "number"},
|
||||||
|
"category_id": {"type": "number"},
|
||||||
|
"product_name": {"type": "string"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"supportedSyncModes": ["full_refresh", "incremental"],
|
||||||
|
"defaultCursorField": [],
|
||||||
|
"sourceDefinedPrimaryKey": [["product_id"]],
|
||||||
|
"namespace": "production"
|
||||||
|
},
|
||||||
|
"config": {
|
||||||
|
"syncMode": "full_refresh",
|
||||||
|
"cursorField": [],
|
||||||
|
"destinationSyncMode": "overwrite",
|
||||||
|
"primaryKey": [["product_id"]],
|
||||||
|
"aliasName": "products",
|
||||||
|
"selected": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"stream": {
|
||||||
|
"name": "stocks",
|
||||||
|
"jsonSchema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"quantity": {"type": "number"},
|
||||||
|
"store_id": {"type": "number"},
|
||||||
|
"product_id": {"type": "number"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"supportedSyncModes": ["full_refresh", "incremental"],
|
||||||
|
"defaultCursorField": [],
|
||||||
|
"sourceDefinedPrimaryKey": [["store_id"], ["product_id"]],
|
||||||
|
"namespace": "production"
|
||||||
|
},
|
||||||
|
"config": {
|
||||||
|
"syncMode": "full_refresh",
|
||||||
|
"cursorField": [],
|
||||||
|
"destinationSyncMode": "overwrite",
|
||||||
|
"primaryKey": [["store_id"], ["product_id"]],
|
||||||
|
"aliasName": "stocks",
|
||||||
|
"selected": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"schedule": {"units": 24, "timeUnit": "hours"},
|
||||||
|
"status": "inactive"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"jobs": [
|
||||||
|
{
|
||||||
|
"job": {
|
||||||
|
"id": 14,
|
||||||
|
"configType": "sync",
|
||||||
|
"configId": "a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
"createdAt": 1655482894,
|
||||||
|
"updatedAt": 1655482894,
|
||||||
|
"status": "running"
|
||||||
|
},
|
||||||
|
"attempts": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"status": "running",
|
||||||
|
"createdAt": 1655482894,
|
||||||
|
"updatedAt": 1655482894,
|
||||||
|
"streamStats": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job": {
|
||||||
|
"id": 13,
|
||||||
|
"configType": "sync",
|
||||||
|
"configId": "a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
"createdAt": 1655393914,
|
||||||
|
"updatedAt": 1655394054,
|
||||||
|
"status": "succeeded"
|
||||||
|
},
|
||||||
|
"attempts": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"status": "succeeded",
|
||||||
|
"createdAt": 1655393914,
|
||||||
|
"updatedAt": 1655394054,
|
||||||
|
"endedAt": 1655394054,
|
||||||
|
"bytesSynced": 85607,
|
||||||
|
"recordsSynced": 1276,
|
||||||
|
"totalStats": {
|
||||||
|
"recordsEmitted": 1276,
|
||||||
|
"bytesEmitted": 85607,
|
||||||
|
"stateMessagesEmitted": 0,
|
||||||
|
"recordsCommitted": 1276
|
||||||
|
},
|
||||||
|
"streamStats": [
|
||||||
|
{
|
||||||
|
"streamName": "brands",
|
||||||
|
"stats": {
|
||||||
|
"recordsEmitted": 9,
|
||||||
|
"bytesEmitted": 333,
|
||||||
|
"recordsCommitted": 9
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"streamName": "categories",
|
||||||
|
"stats": {
|
||||||
|
"recordsEmitted": 7,
|
||||||
|
"bytesEmitted": 359,
|
||||||
|
"recordsCommitted": 7
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"streamName": "stocks",
|
||||||
|
"stats": {
|
||||||
|
"recordsEmitted": 939,
|
||||||
|
"bytesEmitted": 41608,
|
||||||
|
"recordsCommitted": 939
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"streamName": "products",
|
||||||
|
"stats": {
|
||||||
|
"recordsEmitted": 321,
|
||||||
|
"bytesEmitted": 43307,
|
||||||
|
"recordsCommitted": 321
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
177
ingestion/tests/unit/topology/pipeline/test_airbyte.py
Normal file
177
ingestion/tests/unit/topology/pipeline/test_airbyte.py
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
import json
|
||||||
|
from unittest import TestCase
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
||||||
|
from metadata.generated.schema.entity.data.pipeline import (
|
||||||
|
Pipeline,
|
||||||
|
PipelineStatus,
|
||||||
|
StatusType,
|
||||||
|
Task,
|
||||||
|
TaskStatus,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.entity.services.pipelineService import (
|
||||||
|
PipelineConnection,
|
||||||
|
PipelineService,
|
||||||
|
PipelineServiceType,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||||
|
OpenMetadataWorkflowConfig,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.type.entityReference import EntityReference
|
||||||
|
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
||||||
|
from metadata.ingestion.source.pipeline.airbyte import (
|
||||||
|
AirbytePipelineDetails,
|
||||||
|
AirbyteSource,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_data_file = open("ingestion/tests/unit/resources/datasets/airbyte_dataset.json")
|
||||||
|
mock_data: dict = json.load(mock_data_file)
|
||||||
|
|
||||||
|
mock_airbyte_config = {
|
||||||
|
"source": {
|
||||||
|
"type": "airbyte",
|
||||||
|
"serviceName": "airbyte_source",
|
||||||
|
"serviceConnection": {
|
||||||
|
"config": {"type": "Airbyte", "hostPort": "http://localhost:1234"}
|
||||||
|
},
|
||||||
|
"sourceConfig": {"config": {"type": "PipelineMetadata"}},
|
||||||
|
},
|
||||||
|
"sink": {"type": "metadata-rest", "config": {}},
|
||||||
|
"workflowConfig": {
|
||||||
|
"openMetadataServerConfig": {
|
||||||
|
"hostPort": "http://localhost:8585/api",
|
||||||
|
"authProvider": "no-auth",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
EXPECTED_ARIBYTE_DETAILS = AirbytePipelineDetails(
|
||||||
|
workspace=mock_data["workspace"][0], connection=mock_data["connection"][0]
|
||||||
|
)
|
||||||
|
|
||||||
|
MOCK_CONNECTION_URI_PATH = "/workspaces/af5680ec-2687-4fe0-bd55-5ad5f020a603/connections/a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f"
|
||||||
|
MOCK_LOG_URL = f"http://localhost:1234{MOCK_CONNECTION_URI_PATH}"
|
||||||
|
|
||||||
|
|
||||||
|
EXPECTED_PIPELINE_STATUS = [
|
||||||
|
OMetaPipelineStatus(
|
||||||
|
pipeline_fqn="airbyte_source.a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
pipeline_status=PipelineStatus(
|
||||||
|
executionStatus=StatusType.Pending.value,
|
||||||
|
taskStatus=[
|
||||||
|
TaskStatus(
|
||||||
|
name="a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
executionStatus=StatusType.Pending.value,
|
||||||
|
startTime=1655482894,
|
||||||
|
endTime=None,
|
||||||
|
logLink=f"{MOCK_LOG_URL}/status",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
executionDate=1655482894,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
OMetaPipelineStatus(
|
||||||
|
pipeline_fqn="airbyte_source.a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
pipeline_status=PipelineStatus(
|
||||||
|
executionStatus=StatusType.Successful.value,
|
||||||
|
taskStatus=[
|
||||||
|
TaskStatus(
|
||||||
|
name="a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
executionStatus=StatusType.Successful.value,
|
||||||
|
startTime=1655393914,
|
||||||
|
endTime=1655394054,
|
||||||
|
logLink=f"{MOCK_LOG_URL}/status",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
executionDate=1655393914,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
EXPECTED_CREATED_PIPELINES = CreatePipelineRequest(
|
||||||
|
name="a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
displayName="MSSQL <> Postgres",
|
||||||
|
description="",
|
||||||
|
pipelineUrl=MOCK_CONNECTION_URI_PATH,
|
||||||
|
tasks=[
|
||||||
|
Task(
|
||||||
|
name="a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
displayName="MSSQL <> Postgres",
|
||||||
|
description="",
|
||||||
|
taskUrl=f"{MOCK_CONNECTION_URI_PATH}/status",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
service=EntityReference(
|
||||||
|
id="85811038-099a-11ed-861d-0242ac120002", type="pipelineService"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
MOCK_PIPELINE_SERVICE = PipelineService(
|
||||||
|
id="85811038-099a-11ed-861d-0242ac120002",
|
||||||
|
name="airbyte_source",
|
||||||
|
connection=PipelineConnection(),
|
||||||
|
serviceType=PipelineServiceType.Airbyte,
|
||||||
|
)
|
||||||
|
|
||||||
|
MOCK_PIPELINE = Pipeline(
|
||||||
|
id="2aaa012e-099a-11ed-861d-0242ac120002",
|
||||||
|
name="a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
fullyQualifiedName="airbyte_source.a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
displayName="MSSQL <> Postgres",
|
||||||
|
description="",
|
||||||
|
pipelineUrl=MOCK_CONNECTION_URI_PATH,
|
||||||
|
tasks=[
|
||||||
|
Task(
|
||||||
|
name="a10f6d82-4fc6-4c90-ba04-bb773c8fbb0f",
|
||||||
|
displayName="MSSQL <> Postgres",
|
||||||
|
description="",
|
||||||
|
taskUrl=f"{MOCK_CONNECTION_URI_PATH}/status",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
service=EntityReference(
|
||||||
|
id="85811038-099a-11ed-861d-0242ac120002", type="pipelineService"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AirbyteUnitTest(TestCase):
|
||||||
|
def __init__(self, methodName: str = ...) -> None:
|
||||||
|
super().__init__(methodName)
|
||||||
|
self.init_workflow()
|
||||||
|
|
||||||
|
@patch("metadata.ingestion.source.pipeline.pipeline_service.test_connection")
|
||||||
|
@patch("metadata.ingestion.source.pipeline.airbyte.AirbyteClient")
|
||||||
|
def init_workflow(self, airbyte_client, test_connection):
|
||||||
|
test_connection.return_value = False
|
||||||
|
config = OpenMetadataWorkflowConfig.parse_obj(mock_airbyte_config)
|
||||||
|
self.aribyte = AirbyteSource.create(
|
||||||
|
mock_airbyte_config["source"],
|
||||||
|
config.workflowConfig.openMetadataServerConfig,
|
||||||
|
)
|
||||||
|
self.aribyte.context.__dict__["pipeline"] = MOCK_PIPELINE
|
||||||
|
self.aribyte.context.__dict__["pipeline_service"] = MOCK_PIPELINE_SERVICE
|
||||||
|
self.client = airbyte_client.return_value
|
||||||
|
self.client.list_jobs.return_value = mock_data.get("jobs")
|
||||||
|
self.client.list_workspaces.return_value = mock_data.get("workspace")
|
||||||
|
self.client.list_connections.return_value = mock_data.get("connection")
|
||||||
|
|
||||||
|
def test_pipeline_list(self):
|
||||||
|
assert list(self.aribyte.get_pipelines_list())[0] == EXPECTED_ARIBYTE_DETAILS
|
||||||
|
|
||||||
|
def test_pipeline_name(self):
|
||||||
|
assert self.aribyte.get_pipeline_name(
|
||||||
|
EXPECTED_ARIBYTE_DETAILS
|
||||||
|
) == mock_data.get("connection")[0].get("connectionId")
|
||||||
|
|
||||||
|
def test_pipelines(self):
|
||||||
|
pipline = list(self.aribyte.yield_pipeline(EXPECTED_ARIBYTE_DETAILS))[0]
|
||||||
|
assert pipline == EXPECTED_CREATED_PIPELINES
|
||||||
|
|
||||||
|
def test_pipeline_status(self):
|
||||||
|
assert (
|
||||||
|
list(self.aribyte.yield_pipeline_status(EXPECTED_ARIBYTE_DETAILS))
|
||||||
|
== EXPECTED_PIPELINE_STATUS
|
||||||
|
)
|
Loading…
x
Reference in New Issue
Block a user