mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-20 15:57:49 +00:00
Simplify topology & update context management (#13196)
This commit is contained in:
parent
047ab980cc
commit
442528267c
@ -13,11 +13,15 @@ Mixin to be used by service sources to dynamically
|
||||
generate the _run based on their topology.
|
||||
"""
|
||||
import traceback
|
||||
from functools import singledispatchmethod
|
||||
from typing import Any, Generic, Iterable, List, TypeVar
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||
from metadata.generated.schema.entity.classification.tag import Tag
|
||||
from metadata.ingestion.api.models import Either, Entity
|
||||
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
|
||||
from metadata.ingestion.models.topology import (
|
||||
NodeStage,
|
||||
ServiceTopology,
|
||||
@ -134,7 +138,7 @@ class TopologyRunnerMixin(Generic[C]):
|
||||
"""
|
||||
yield from self.process_nodes(get_topology_root(self.topology))
|
||||
|
||||
def update_context(self, key: str, value: Any) -> None:
|
||||
def _replace_context(self, key: str, value: Any) -> None:
|
||||
"""
|
||||
Update the key of the context with the given value
|
||||
:param key: element to update from the source context
|
||||
@ -142,7 +146,7 @@ class TopologyRunnerMixin(Generic[C]):
|
||||
"""
|
||||
self.context.__dict__[key] = value
|
||||
|
||||
def append_context(self, key: str, value: Any) -> None:
|
||||
def _append_context(self, key: str, value: Any) -> None:
|
||||
"""
|
||||
Update the key of the context with the given value
|
||||
:param key: element to update from the source context
|
||||
@ -172,37 +176,30 @@ class TopologyRunnerMixin(Generic[C]):
|
||||
*context_names, entity_request.name.__root__
|
||||
)
|
||||
|
||||
def sink_request(
|
||||
self, stage: NodeStage, entity_request: Either[C]
|
||||
def update_context(self, stage: NodeStage, entity: Entity):
|
||||
"""Append or update context"""
|
||||
if stage.context and not stage.cache_all:
|
||||
self._replace_context(key=stage.context, value=entity)
|
||||
if stage.context and stage.cache_all:
|
||||
self._append_context(key=stage.context, value=entity)
|
||||
|
||||
@singledispatchmethod
|
||||
def yield_and_update_context(
|
||||
self,
|
||||
right: C,
|
||||
stage: NodeStage,
|
||||
entity_request: Either[C],
|
||||
) -> Iterable[Either[Entity]]:
|
||||
"""
|
||||
Validate that the entity was properly updated or retry if
|
||||
ack_sink is flagged.
|
||||
Handle the process of yielding the request and validating
|
||||
that everything was properly updated.
|
||||
|
||||
If we get the Entity back, update the context with it.
|
||||
|
||||
:param stage: Node stage being processed
|
||||
:param entity_request: Request to pass
|
||||
:return: Entity generator
|
||||
The default implementation is based on a get_by_name validation
|
||||
"""
|
||||
|
||||
# Either use the received request or the acknowledged Entity
|
||||
entity = entity_request.right
|
||||
|
||||
if not stage.nullable and entity is None:
|
||||
raise ValueError("Value unexpectedly None")
|
||||
|
||||
# Check that we properly received a Right response to process
|
||||
if entity_request.right is not None:
|
||||
|
||||
# We need to acknowledge that the Entity has been properly sent to the server
|
||||
# to update the context
|
||||
if stage.ack_sink:
|
||||
entity = None
|
||||
|
||||
entity_fqn = self.fqn_from_context(
|
||||
stage=stage, entity_request=entity_request.right
|
||||
)
|
||||
entity_fqn = self.fqn_from_context(stage=stage, entity_request=right)
|
||||
|
||||
# we get entity from OM if we do not want to overwrite existing data in OM
|
||||
if not stage.overwrite and not self._is_force_overwrite_enabled():
|
||||
@ -232,13 +229,94 @@ class TopologyRunnerMixin(Generic[C]):
|
||||
"for the service connection."
|
||||
)
|
||||
|
||||
else:
|
||||
self.update_context(stage=stage, entity=entity)
|
||||
|
||||
@yield_and_update_context.register
|
||||
def _(
|
||||
self,
|
||||
right: AddLineageRequest,
|
||||
stage: NodeStage,
|
||||
entity_request: Either[C],
|
||||
) -> Iterable[Either[Entity]]:
|
||||
"""
|
||||
Lineage Implementation for the context information.
|
||||
|
||||
There is no simple (efficient) validation to make sure that this specific
|
||||
lineage has been properly drawn. We'll skip the process for now.
|
||||
"""
|
||||
yield entity_request
|
||||
self.update_context(stage=stage, entity=right)
|
||||
|
||||
@yield_and_update_context.register
|
||||
def _(
|
||||
self,
|
||||
right: OMetaTagAndClassification,
|
||||
stage: NodeStage,
|
||||
entity_request: Either[C],
|
||||
) -> Iterable[Either[Entity]]:
|
||||
"""Tag implementation for the context information"""
|
||||
yield entity_request
|
||||
|
||||
if stage.context and not stage.cache_all:
|
||||
self.update_context(key=stage.context, value=entity)
|
||||
if stage.context and stage.cache_all:
|
||||
self.append_context(key=stage.context, value=entity)
|
||||
tag = None
|
||||
|
||||
tries = 3
|
||||
while not tag and tries > 0:
|
||||
yield entity_request
|
||||
tag = self.metadata.get_by_name(
|
||||
entity=Tag,
|
||||
fqn=fqn.build(
|
||||
metadata=self.metadata,
|
||||
entity_type=Tag,
|
||||
classification_name=right.tag_request.classification.__root__,
|
||||
tag_name=right.tag_request.name.__root__,
|
||||
),
|
||||
)
|
||||
tries -= 1
|
||||
|
||||
# We have ack the sink waiting for a response, but got nothing back
|
||||
if stage.must_return and tag is None:
|
||||
# Safe access to Entity Request name
|
||||
raise MissingExpectedEntityAckException(
|
||||
f"Missing ack back from [Tag: {right.tag_request.name}] - "
|
||||
"Possible causes are changes in the server Fernet key or mismatched JSON Schemas "
|
||||
"for the service connection."
|
||||
)
|
||||
|
||||
# We want to keep the full payload in the context
|
||||
self.update_context(stage=stage, entity=right)
|
||||
|
||||
def sink_request(
|
||||
self, stage: NodeStage, entity_request: Either[C]
|
||||
) -> Iterable[Either[Entity]]:
|
||||
"""
|
||||
Validate that the entity was properly updated or retry if
|
||||
ack_sink is flagged.
|
||||
|
||||
If we get the Entity back, update the context with it.
|
||||
|
||||
:param stage: Node stage being processed
|
||||
:param entity_request: Request to pass
|
||||
:return: Entity generator
|
||||
"""
|
||||
|
||||
# Either use the received request or the acknowledged Entity
|
||||
entity = entity_request.right
|
||||
|
||||
if not stage.nullable and entity is None:
|
||||
raise ValueError("Value unexpectedly None")
|
||||
|
||||
# Check that we properly received a Right response to process
|
||||
if entity_request.right is not None:
|
||||
|
||||
# We need to acknowledge that the Entity has been properly sent to the server
|
||||
# to update the context
|
||||
if stage.context:
|
||||
yield from self.yield_and_update_context(
|
||||
entity, stage=stage, entity_request=entity_request
|
||||
)
|
||||
|
||||
else:
|
||||
yield entity_request
|
||||
|
||||
else:
|
||||
# if entity_request.right is None, means that we have a Left. We yield the Either and
|
||||
|
@ -32,8 +32,9 @@ class NodeStage(BaseModel, Generic[T]):
|
||||
|
||||
type_: Type[T] # Entity type
|
||||
processor: str # has the producer results as an argument. Here is where filters happen
|
||||
context: Optional[str] = None # context key storing stage state, if needed
|
||||
ack_sink: bool = True # Validate that the request is present in OM and update the context with the results
|
||||
context: Optional[
|
||||
str
|
||||
] = None # context key storing stage state, if needed. This requires us to ACK the ingestion
|
||||
nullable: bool = False # The yielded value can be null
|
||||
must_return: bool = False # The sink MUST return a value back after ack. Useful to validate services are correct.
|
||||
cache_all: bool = (
|
||||
|
@ -105,9 +105,7 @@ class DashboardServiceTopology(ServiceTopology):
|
||||
),
|
||||
NodeStage(
|
||||
type_=OMetaTagAndClassification,
|
||||
context="tags",
|
||||
processor="yield_tag",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
@ -169,7 +167,6 @@ class DashboardServiceTopology(ServiceTopology):
|
||||
context="lineage",
|
||||
processor="yield_dashboard_lineage",
|
||||
consumer=["dashboard_service"],
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
NodeStage(
|
||||
@ -177,7 +174,6 @@ class DashboardServiceTopology(ServiceTopology):
|
||||
context="usage",
|
||||
processor="yield_dashboard_usage",
|
||||
consumer=["dashboard_service"],
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
|
@ -126,7 +126,6 @@ class DatabaseServiceTopology(ServiceTopology):
|
||||
type_=OMetaTagAndClassification,
|
||||
context="tags",
|
||||
processor="yield_database_schema_tag_details",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
cache_all=True,
|
||||
),
|
||||
@ -147,7 +146,6 @@ class DatabaseServiceTopology(ServiceTopology):
|
||||
type_=OMetaTagAndClassification,
|
||||
context="tags",
|
||||
processor="yield_table_tag_details",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
cache_all=True,
|
||||
),
|
||||
@ -159,9 +157,7 @@ class DatabaseServiceTopology(ServiceTopology):
|
||||
),
|
||||
NodeStage(
|
||||
type_=OMetaLifeCycleData,
|
||||
context="life_cycle",
|
||||
processor="yield_life_cycle_data",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
@ -182,17 +178,15 @@ class DatabaseServiceTopology(ServiceTopology):
|
||||
producer="get_stored_procedure_queries",
|
||||
stages=[
|
||||
NodeStage(
|
||||
type_=AddLineageRequest, # TODO: Fix context management for multiple types
|
||||
type_=AddLineageRequest,
|
||||
processor="yield_procedure_lineage",
|
||||
context="stored_procedure_query_lineage", # Used to flag if the query has had processed lineage
|
||||
nullable=True,
|
||||
ack_sink=False,
|
||||
),
|
||||
NodeStage(
|
||||
type_=Query,
|
||||
processor="yield_procedure_query",
|
||||
nullable=True,
|
||||
ack_sink=False,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
@ -65,7 +65,6 @@ class DbtServiceTopology(ServiceTopology):
|
||||
NodeStage(
|
||||
type_=DbtFiles,
|
||||
processor="validate_dbt_files",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
)
|
||||
],
|
||||
@ -82,14 +81,12 @@ class DbtServiceTopology(ServiceTopology):
|
||||
type_=OMetaTagAndClassification,
|
||||
context="tags",
|
||||
processor="yield_dbt_tags",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
cache_all=True,
|
||||
),
|
||||
NodeStage(
|
||||
type_=DataModelLink,
|
||||
processor="yield_data_models",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
@ -100,17 +97,14 @@ class DbtServiceTopology(ServiceTopology):
|
||||
NodeStage(
|
||||
type_=AddLineageRequest,
|
||||
processor="create_dbt_lineage",
|
||||
ack_sink=False,
|
||||
),
|
||||
NodeStage(
|
||||
type_=AddLineageRequest,
|
||||
processor="create_dbt_query_lineage",
|
||||
ack_sink=False,
|
||||
),
|
||||
NodeStage(
|
||||
type_=DataModelLink,
|
||||
processor="process_dbt_descriptions",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
@ -121,17 +115,14 @@ class DbtServiceTopology(ServiceTopology):
|
||||
NodeStage(
|
||||
type_=CreateTestDefinitionRequest,
|
||||
processor="create_dbt_tests_definition",
|
||||
ack_sink=False,
|
||||
),
|
||||
NodeStage(
|
||||
type_=CreateTestCaseRequest,
|
||||
processor="create_dbt_test_case",
|
||||
ack_sink=False,
|
||||
),
|
||||
NodeStage(
|
||||
type_=TestCaseResult,
|
||||
processor="add_dbt_test_result",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
|
@ -133,12 +133,10 @@ class StoredProcedureMixin:
|
||||
) -> Iterable[Either[AddLineageRequest]]:
|
||||
"""Add procedure lineage from its query"""
|
||||
|
||||
self.update_context(key="stored_procedure_query_lineage", value=False)
|
||||
if self.is_lineage_query(
|
||||
query_type=query_by_procedure.query_type,
|
||||
query_text=query_by_procedure.query_text,
|
||||
):
|
||||
self.update_context(key="stored_procedure_query_lineage", value=True)
|
||||
|
||||
for either_lineage in get_lineage_by_query(
|
||||
self.metadata,
|
||||
|
@ -95,11 +95,9 @@ class MessagingServiceTopology(ServiceTopology):
|
||||
),
|
||||
NodeStage(
|
||||
type_=TopicSampleData,
|
||||
context="topic_sample_data",
|
||||
processor="yield_topic_sample_data",
|
||||
consumer=["messaging_service"],
|
||||
nullable=True,
|
||||
ack_sink=False,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
@ -129,7 +129,7 @@ class DatabrickspipelineSource(PipelineServiceSource):
|
||||
|
||||
def get_tasks(self, pipeline_details: dict) -> List[Task]:
|
||||
task_list = []
|
||||
self.append_context(key="job_id_list", value=pipeline_details["job_id"])
|
||||
self._append_context(key="job_id_list", value=pipeline_details["job_id"])
|
||||
|
||||
downstream_tasks = self.get_downstream_tasks(
|
||||
pipeline_details["settings"].get("tasks")
|
||||
|
@ -81,7 +81,6 @@ class PipelineServiceTopology(ServiceTopology):
|
||||
type_=OMetaTagAndClassification,
|
||||
context="tags",
|
||||
processor="yield_tag",
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
NodeStage(
|
||||
@ -92,18 +91,14 @@ class PipelineServiceTopology(ServiceTopology):
|
||||
),
|
||||
NodeStage(
|
||||
type_=OMetaPipelineStatus,
|
||||
context="pipeline_status",
|
||||
processor="yield_pipeline_status",
|
||||
consumer=["pipeline_service"],
|
||||
nullable=True,
|
||||
ack_sink=False,
|
||||
),
|
||||
NodeStage(
|
||||
type_=AddLineageRequest,
|
||||
context="lineage",
|
||||
processor="yield_pipeline_lineage",
|
||||
consumer=["pipeline_service"],
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
|
@ -91,10 +91,8 @@ class SearchServiceTopology(ServiceTopology):
|
||||
),
|
||||
NodeStage(
|
||||
type_=OMetaIndexSampleData,
|
||||
context="search_index_sample_data",
|
||||
processor="yield_search_index_sample_data",
|
||||
consumer=["search_service"],
|
||||
ack_sink=False,
|
||||
nullable=True,
|
||||
),
|
||||
],
|
||||
|
@ -30,9 +30,7 @@ class MockTopology(ServiceTopology):
|
||||
stages=[
|
||||
NodeStage(
|
||||
type_=int,
|
||||
context="numbers",
|
||||
processor="yield_numbers",
|
||||
ack_sink=False,
|
||||
)
|
||||
],
|
||||
children=["strings"],
|
||||
@ -42,9 +40,7 @@ class MockTopology(ServiceTopology):
|
||||
stages=[
|
||||
NodeStage(
|
||||
type_=str,
|
||||
context="strings",
|
||||
processor="yield_strings",
|
||||
ack_sink=False,
|
||||
consumer=["numbers"],
|
||||
)
|
||||
],
|
||||
@ -69,21 +65,23 @@ class MockSource(TopologyRunnerMixin):
|
||||
def yield_numbers(number: int):
|
||||
yield Either(right=number + 1)
|
||||
|
||||
def yield_strings(self, my_str: str):
|
||||
yield Either(right=my_str + str(self.context.numbers))
|
||||
@staticmethod
|
||||
def yield_strings(my_str: str):
|
||||
yield Either(right=my_str)
|
||||
|
||||
|
||||
class TopologyRunnerTest(TestCase):
|
||||
"""Validate filter patterns"""
|
||||
|
||||
def test_node_and_stage(self):
|
||||
@staticmethod
|
||||
def test_node_and_stage():
|
||||
source = MockSource()
|
||||
processed = list(source._iter())
|
||||
assert [either.right for either in processed] == [
|
||||
2,
|
||||
"abc2",
|
||||
"def2",
|
||||
"abc",
|
||||
"def",
|
||||
3,
|
||||
"abc3",
|
||||
"def3",
|
||||
"abc",
|
||||
"def",
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user