mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-02 19:48:17 +00:00
MINOR: Add logic to handle WorkflowContext on Ingestion (#21425)
* Add logic to handle WorkflowContext on Ingestion * Revert base.py changes * Removed comment * Fix basedpyright complaints * Make ContextManager automatically add its context to the PipelineStatus * Small changes (cherry picked from commit 5b20b845462cfcb568b92dbf22e160226433fae5)
This commit is contained in:
parent
008a3ebc27
commit
41dfdff43e
24
ingestion/src/metadata/workflow/context/base.py
Normal file
24
ingestion/src/metadata/workflow/context/base.py
Normal file
@ -0,0 +1,24 @@
|
||||
"""
|
||||
Base context class for workflow contexts.
|
||||
|
||||
This module defines the BaseContext, which all workflow context types should inherit from.
|
||||
It uses Pydantic for data validation, serialization, and type safety.
|
||||
"""
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class BaseContextFieldsEnum(Enum):
|
||||
"""
|
||||
Base class for all workflow context fields.
|
||||
"""
|
||||
|
||||
|
||||
class BaseContext(BaseModel):
|
||||
"""
|
||||
Base class for all workflow contexts. Extend this for specific context types.
|
||||
"""
|
||||
|
||||
class Config:
|
||||
validate_assignment: bool = True
|
||||
122
ingestion/src/metadata/workflow/context/context_manager.py
Normal file
122
ingestion/src/metadata/workflow/context/context_manager.py
Normal file
@ -0,0 +1,122 @@
|
||||
"""
|
||||
Context manager for workflow contexts.
|
||||
|
||||
This module provides the ContextManager singleton, which dynamically registers and exposes
|
||||
workflow contexts as attributes. Contexts are registered using the register_context decorator.
|
||||
This approach combines extensibility with attribute-based access and IDE/static analysis support.
|
||||
"""
|
||||
|
||||
import threading
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
from metadata.workflow.context.base import BaseContext, BaseContextFieldsEnum
|
||||
from metadata.workflow.context.workflow_context import WorkflowContext
|
||||
|
||||
# NOTE: To make the context available on the context manager we need to:
|
||||
# 1. Add it to the ContextsEnum
|
||||
# 2. Declare it and initialize it as a class attribute in ContextManager
|
||||
|
||||
|
||||
class ContextsEnum(Enum):
|
||||
"""
|
||||
Enum defining all available workflow contexts.
|
||||
Each member represents a context type that can be registered with the ContextManager.
|
||||
"""
|
||||
|
||||
WORKFLOW = "workflow"
|
||||
|
||||
|
||||
class ContextManager:
|
||||
"""
|
||||
Singleton manager for all workflow contexts. Dynamically registers and exposes contexts as attributes.
|
||||
"""
|
||||
|
||||
_instance: Optional["ContextManager"] = None
|
||||
_lock: threading.RLock = threading.RLock()
|
||||
|
||||
# List of Contexts
|
||||
workflow: WorkflowContext = WorkflowContext()
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
with cls._lock:
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""
|
||||
Return the singleton instance of ContextManager, creating it if necessary.
|
||||
"""
|
||||
if cls._instance is None:
|
||||
_ = cls()
|
||||
return cls._instance
|
||||
|
||||
@classmethod
|
||||
def set_context_attr(
|
||||
cls, context_enum: ContextsEnum, field_enum: BaseContextFieldsEnum, value: Any
|
||||
):
|
||||
"""
|
||||
Thread-safe method to set an attribute on a context.
|
||||
|
||||
Args:
|
||||
context_enum (Enum): Enum member representing the context (e.g., ContextManager.Contexts.workflow).
|
||||
field_enum (Enum): Enum member representing the field (e.g., WorkflowContext.Fields.serviceName).
|
||||
value (Any): The value to set for the field.
|
||||
"""
|
||||
with cls._lock:
|
||||
instance = cls.get_instance()
|
||||
context = getattr(instance, context_enum.value)
|
||||
setattr(context, field_enum.value, value)
|
||||
|
||||
@classmethod
|
||||
def get_context_attr(
|
||||
cls, context_enum: ContextsEnum, field_enum: BaseContextFieldsEnum
|
||||
) -> Any:
|
||||
"""
|
||||
Thread-safe method to get an attribute from a context.
|
||||
|
||||
Args:
|
||||
context_enum (Enum): Enum member representing the context (e.g., ContextManager.Contexts.workflow).
|
||||
field_enum (Enum): Enum member representing the field (e.g., WorkflowContext.Fields.serviceName).
|
||||
|
||||
Returns:
|
||||
Any: The value of the requested field.
|
||||
"""
|
||||
with cls._lock:
|
||||
instance = cls.get_instance()
|
||||
context = getattr(instance, context_enum.value)
|
||||
return getattr(context, field_enum.value)
|
||||
|
||||
@classmethod
|
||||
def get_context(cls, context_enum: ContextsEnum) -> BaseContext:
|
||||
"""
|
||||
Thread-safe method to retrieve the full context object by Enum member.
|
||||
|
||||
Args:
|
||||
context_enum (Enum): Enum member representing the context (e.g., ContextManager.Contexts.workflow).
|
||||
|
||||
Returns:
|
||||
Any: The context object instance.
|
||||
"""
|
||||
with cls._lock:
|
||||
instance = cls.get_instance()
|
||||
return getattr(instance, context_enum.value)
|
||||
|
||||
@classmethod
|
||||
def dump_contexts(cls) -> Optional[dict[str, Any]]:
|
||||
"""
|
||||
Dump all available contexts as a dictionary: {contextName: content}
|
||||
Assumes each context is a Pydantic object.
|
||||
"""
|
||||
with cls._lock:
|
||||
instance = cls.get_instance()
|
||||
result: dict[str, Any] = {}
|
||||
for context_enum in ContextsEnum:
|
||||
context_obj = getattr(instance, context_enum.value)
|
||||
result[context_enum.value] = context_obj.model_dump()
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
29
ingestion/src/metadata/workflow/context/workflow_context.py
Normal file
29
ingestion/src/metadata/workflow/context/workflow_context.py
Normal file
@ -0,0 +1,29 @@
|
||||
"""
|
||||
Workflow context definition.
|
||||
|
||||
This module defines the WorkflowContext, which holds workflow-level metadata such as the service name.
|
||||
It is registered with the ContextManager for attribute-based access throughout the workflow system.
|
||||
"""
|
||||
from typing import Union
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from .base import BaseContext, BaseContextFieldsEnum
|
||||
|
||||
|
||||
class WorkflowContextFieldsEnum(BaseContextFieldsEnum):
|
||||
"""
|
||||
Enum defining all available workflow context fields.
|
||||
"""
|
||||
|
||||
SERVICE_NAME = "serviceName"
|
||||
|
||||
|
||||
class WorkflowContext(BaseContext):
|
||||
"""
|
||||
Context for workflow-level metadata.
|
||||
"""
|
||||
|
||||
serviceName: Union[str, None] = Field(
|
||||
default=None, description="Name of the service on which the workflow operates"
|
||||
)
|
||||
@ -29,10 +29,11 @@ from metadata.generated.schema.entity.services.ingestionPipelines.status import
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
OpenMetadataWorkflowConfig,
|
||||
)
|
||||
from metadata.generated.schema.type.basic import Timestamp
|
||||
from metadata.generated.schema.type.basic import Map, Timestamp
|
||||
from metadata.ingestion.api.step import Step, Summary
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.utils.logger import ometa_logger
|
||||
from metadata.workflow.context.context_manager import ContextManager
|
||||
|
||||
logger = ometa_logger()
|
||||
|
||||
@ -80,12 +81,23 @@ class WorkflowStatusMixin:
|
||||
pipelineState=state,
|
||||
startDate=Timestamp(self._start_ts),
|
||||
timestamp=Timestamp(self._start_ts),
|
||||
)
|
||||
) # type: ignore
|
||||
|
||||
def update_pipeline_status_metadata(
|
||||
self, pipeline_status: PipelineStatus
|
||||
) -> PipelineStatus:
|
||||
"""
|
||||
Update the pipeline status metadata with the context manager data.
|
||||
"""
|
||||
metadata = ContextManager.dump_contexts()
|
||||
if metadata:
|
||||
pipeline_status.metadata = Map(**metadata)
|
||||
else:
|
||||
pipeline_status.metadata = None
|
||||
return pipeline_status
|
||||
|
||||
def set_ingestion_pipeline_status(
|
||||
self,
|
||||
state: PipelineState,
|
||||
ingestion_status: Optional[IngestionStatus] = None,
|
||||
self, state: PipelineState, ingestion_status: Optional[IngestionStatus] = None
|
||||
) -> None:
|
||||
"""
|
||||
Method to set the pipeline status of current ingestion pipeline
|
||||
@ -93,7 +105,11 @@ class WorkflowStatusMixin:
|
||||
|
||||
try:
|
||||
# if we don't have a related Ingestion Pipeline FQN, no status is set.
|
||||
if self.config.ingestionPipelineFQN and self.ingestion_pipeline:
|
||||
if (
|
||||
self.config.ingestionPipelineFQN
|
||||
and self.ingestion_pipeline
|
||||
and self.ingestion_pipeline.fullyQualifiedName
|
||||
):
|
||||
pipeline_status = self.metadata.get_pipeline_status(
|
||||
self.ingestion_pipeline.fullyQualifiedName.root, self.run_id
|
||||
)
|
||||
@ -112,10 +128,15 @@ class WorkflowStatusMixin:
|
||||
)
|
||||
# committing configurations can be a burden on resources,
|
||||
# we dump a subset to be mindful of the payload size
|
||||
pipeline_status.config = self.config.model_dump(
|
||||
include={"appConfig"},
|
||||
mask_secrets=True,
|
||||
pipeline_status.config = Map(
|
||||
**self.config.model_dump(
|
||||
include={"appConfig"},
|
||||
mask_secrets=True,
|
||||
)
|
||||
)
|
||||
|
||||
pipeline_status = self.update_pipeline_status_metadata(pipeline_status)
|
||||
|
||||
self.metadata.create_or_update_pipeline_status(
|
||||
self.ingestion_pipeline.fullyQualifiedName.root, pipeline_status
|
||||
)
|
||||
@ -129,13 +150,13 @@ class WorkflowStatusMixin:
|
||||
"""
|
||||
Method to raise error if failed execution
|
||||
"""
|
||||
self.raise_from_status_internal(raise_warnings)
|
||||
self.raise_from_status_internal(raise_warnings) # type: ignore
|
||||
|
||||
def result_status(self) -> WorkflowResultStatus:
|
||||
"""
|
||||
Returns 1 if source status is failed, 0 otherwise.
|
||||
"""
|
||||
if self.get_failures():
|
||||
if self.get_failures(): # type: ignore
|
||||
return WorkflowResultStatus.FAILURE
|
||||
return WorkflowResultStatus.SUCCESS
|
||||
|
||||
@ -148,6 +169,6 @@ class WorkflowStatusMixin:
|
||||
return IngestionStatus(
|
||||
[
|
||||
StepSummary.model_validate(Summary.from_step(step).model_dump())
|
||||
for step in self.workflow_steps()
|
||||
for step in self.workflow_steps() # type: ignore
|
||||
]
|
||||
)
|
||||
|
||||
60
ingestion/tests/unit/workflow/test_context_manager.py
Normal file
60
ingestion/tests/unit/workflow/test_context_manager.py
Normal file
@ -0,0 +1,60 @@
|
||||
from metadata.workflow.context.context_manager import ContextManager, ContextsEnum
|
||||
from metadata.workflow.context.workflow_context import (
|
||||
WorkflowContext,
|
||||
WorkflowContextFieldsEnum,
|
||||
)
|
||||
|
||||
|
||||
def test_singleton_behavior():
|
||||
cm1 = ContextManager.get_instance()
|
||||
cm2 = ContextManager.get_instance()
|
||||
assert cm1 is cm2, "ContextManager should be a singleton"
|
||||
|
||||
|
||||
def test_context_get_set_attr():
|
||||
service_name = "test_service"
|
||||
cm = ContextManager.get_instance()
|
||||
assert cm is not None
|
||||
# Set and get using enums
|
||||
ContextManager.set_context_attr(
|
||||
ContextsEnum.WORKFLOW, WorkflowContextFieldsEnum.SERVICE_NAME, service_name
|
||||
)
|
||||
value = ContextManager.get_context_attr(
|
||||
ContextsEnum.WORKFLOW, WorkflowContextFieldsEnum.SERVICE_NAME
|
||||
)
|
||||
assert value == service_name
|
||||
|
||||
|
||||
def test_get_context_returns_context():
|
||||
cm = ContextManager.get_instance()
|
||||
assert cm is not None
|
||||
workflow_ctx = ContextManager.get_context(ContextsEnum.WORKFLOW)
|
||||
assert isinstance(workflow_ctx, WorkflowContext)
|
||||
# Should be the same object as returned by get_context
|
||||
assert workflow_ctx is ContextManager.get_context(ContextsEnum.WORKFLOW)
|
||||
|
||||
|
||||
def test_thread_safety():
|
||||
import threading
|
||||
|
||||
cm = ContextManager.get_instance()
|
||||
assert cm is not None
|
||||
|
||||
def set_service_name(name):
|
||||
ContextManager.set_context_attr(
|
||||
ContextsEnum.WORKFLOW, WorkflowContextFieldsEnum.SERVICE_NAME, name
|
||||
)
|
||||
|
||||
threads = [
|
||||
threading.Thread(target=set_service_name, args=(f"service_{i}",))
|
||||
for i in range(10)
|
||||
]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
# The final value should be one of the set values
|
||||
final_value = ContextManager.get_context_attr(
|
||||
ContextsEnum.WORKFLOW, WorkflowContextFieldsEnum.SERVICE_NAME
|
||||
)
|
||||
assert final_value in {f"service_{i}" for i in range(10)}
|
||||
@ -47,6 +47,10 @@
|
||||
"config": {
|
||||
"description": "Pipeline configuration for this particular execution.",
|
||||
"$ref": "../../../type/basic.json#/definitions/map"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Metadata for the pipeline status.",
|
||||
"$ref": "../../../type/basic.json#/definitions/map"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
|
||||
@ -652,6 +652,10 @@ export interface PipelineStatus {
|
||||
* endDate of the pipeline run for this particular execution.
|
||||
*/
|
||||
endDate?: number;
|
||||
/**
|
||||
* Metadata for the pipeline status.
|
||||
*/
|
||||
metadata?: { [key: string]: any };
|
||||
/**
|
||||
* Pipeline status denotes if its failed or succeeded.
|
||||
*/
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user