feat(actions): require pydantic v2 (#13958)

Co-authored-by: Cursor Agent <cursoragent@cursor.com>
This commit is contained in:
Harshal Sheth 2025-07-04 16:44:50 -04:00 committed by GitHub
parent 308c6bd28e
commit 5143894be9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 38 additions and 36 deletions

View File

@ -51,7 +51,7 @@ base_requirements = {
"mypy_extensions>=0.4.3",
# Actual dependencies.
"typing-inspect",
"pydantic>=1.10.21",
"pydantic>=2.0.0,<3.0.0",
"ratelimit",
# Lower bounds on httpcore and h11 due to CVE-2025-43859.
"httpcore>=1.0.9",
@ -167,9 +167,6 @@ full_test_dev_requirements = {
]
for dependency in plugins[plugin]
),
# In our tests, we want to always test against pydantic v2.
# However, we maintain compatibility with pydantic v1 for now.
"pydantic>2",
}
entry_points = {

View File

@ -112,7 +112,7 @@ class Pipeline:
@classmethod
def create(cls, config_dict: dict) -> "Pipeline":
# Bind config
config = PipelineConfig.parse_obj(config_dict)
config = PipelineConfig.model_validate(config_dict)
if not config.enabled:
raise Exception(

View File

@ -76,7 +76,7 @@ class ExecutorConfig(BaseModel):
class ExecutorAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
config = ExecutorConfig.parse_obj(config_dict or {})
config = ExecutorConfig.model_validate(config_dict or {})
return cls(config, ctx)
def __init__(self, config: ExecutorConfig, ctx: PipelineContext):
@ -206,7 +206,7 @@ class ExecutorAction(Action):
# TODO: Once SecretStoreConfig is updated to accept arbitrary types
# and not just dicts, we can just pass in the DataHubSecretStoreConfig
# object directly.
config=DataHubSecretStoreConfig(graph_client=graph).dict(),
config=DataHubSecretStoreConfig(graph_client=graph).model_dump(),
),
],
graph_client=graph,

View File

@ -34,7 +34,7 @@ class HelloWorldConfig(BaseModel):
class HelloWorldAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
action_config = HelloWorldConfig.parse_obj(config_dict or {})
action_config = HelloWorldConfig.model_validate(config_dict or {})
return cls(action_config, ctx)
def __init__(self, config: HelloWorldConfig, ctx: PipelineContext):

View File

@ -48,7 +48,7 @@ class MetadataChangeSyncAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
action_config = MetadataChangeEmitterConfig.parse_obj(config_dict or {})
action_config = MetadataChangeEmitterConfig.model_validate(config_dict or {})
return cls(action_config, ctx)
def __init__(self, config: MetadataChangeEmitterConfig, ctx: PipelineContext):

View File

@ -161,7 +161,7 @@ class DocPropagationAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
action_config = DocPropagationConfig.parse_obj(config_dict or {})
action_config = DocPropagationConfig.model_validate(config_dict or {})
logger.info(f"Doc Propagation Config action configured with {action_config}")
return cls(action_config, ctx)
@ -442,7 +442,7 @@ class DocPropagationAction(Action):
# otherwise, we add a new documentation entry sourced by this action
for doc_association in documentations.documentations[:]:
if doc_association.attribution and doc_association.attribution.source:
source_details_parsed: SourceDetails = SourceDetails.parse_obj(
source_details_parsed: SourceDetails = SourceDetails.model_validate(
doc_association.attribution.sourceDetail
)
if doc_association.attribution.source == self.action_urn and (
@ -680,7 +680,7 @@ class DocPropagationAction(Action):
f"Downstreams: {downstreams} for {doc_propagation_directive.entity}"
)
entity_urn = doc_propagation_directive.entity
propagated_context = SourceDetails.parse_obj(context.dict())
propagated_context = SourceDetails.model_validate(context.model_dump())
propagated_context.propagation_relationship = RelationshipType.LINEAGE
propagated_context.propagation_direction = DirectionType.DOWN
propagated_entities_this_hop_count = 0
@ -753,7 +753,7 @@ class DocPropagationAction(Action):
)
logger.debug(f"Upstreams: {upstreams} for {doc_propagation_directive.entity}")
entity_urn = doc_propagation_directive.entity
propagated_context = SourceDetails.parse_obj(context.dict())
propagated_context = SourceDetails.model_validate(context.model_dump())
propagated_context.propagation_relationship = RelationshipType.LINEAGE
propagated_context.propagation_direction = DirectionType.UP
propagated_entities_this_hop_count = 0
@ -819,7 +819,7 @@ class DocPropagationAction(Action):
assert self.ctx.graph
entity_urn = doc_propagation_directive.entity
siblings = get_unique_siblings(self.ctx.graph, entity_urn)
propagated_context = SourceDetails.parse_obj(context.dict())
propagated_context = SourceDetails.model_validate(context.model_dump())
propagated_context.propagation_relationship = RelationshipType.SIBLING
propagated_context.propagation_direction = DirectionType.ALL

View File

@ -19,9 +19,7 @@ from enum import Enum
from functools import wraps
from typing import Any, Dict, Iterable, List, Optional, Tuple
from pydantic import validator
from pydantic.fields import Field
from pydantic.main import BaseModel
from pydantic import BaseModel, Field, field_validator
from ratelimit import limits, sleep_and_retry
import datahub.metadata.schema_classes as models
@ -111,13 +109,15 @@ class SourceDetails(BaseModel):
description="The direction that the metadata was propagated through.",
)
@validator("propagated", pre=True)
@field_validator("propagated", mode="before")
@classmethod
def convert_boolean_to_lowercase_string(cls, v: Any) -> Optional[str]:
if isinstance(v, bool):
return str(v).lower()
return v
@validator("propagation_depth", "propagation_started_at", pre=True)
@field_validator("propagation_depth", "propagation_started_at", mode="before")
@classmethod
def convert_to_int(cls, v: Any) -> Optional[int]:
if v is not None:
return int(v)
@ -129,7 +129,7 @@ class SourceDetails(BaseModel):
Metadata Attribution MCPs.
"""
result = {}
for k, v in self.dict(exclude_none=True).items():
for k, v in self.model_dump(exclude_none=True).items():
if isinstance(v, Enum):
result[k] = v.value # Use the enum's value
elif isinstance(v, int):

View File

@ -92,7 +92,7 @@ class SlackNotificationAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
action_config = SlackNotificationConfig.parse_obj(config_dict or {})
action_config = SlackNotificationConfig.model_validate(config_dict or {})
logger.info(f"Slack notification action configured with {action_config}")
return cls(action_config, ctx)

View File

@ -65,7 +65,7 @@ class SnowflakeTagPropagatorAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
config = SnowflakeTagPropagatorConfig.parse_obj(config_dict or {})
config = SnowflakeTagPropagatorConfig.model_validate(config_dict or {})
return cls(config, ctx)
@staticmethod

View File

@ -111,7 +111,7 @@ class EventProcessingStats(BaseModel):
).isoformat()
def __str__(self) -> str:
return json.dumps(self.dict(), indent=2)
return json.dumps(self.model_dump(), indent=2)
class StageStatus(StrEnum):
@ -165,7 +165,7 @@ class ActionStageReport(BaseModel):
return Report.to_pure_python_obj(self)
def aggregatable_stats(self) -> Dict[str, int]:
all_items = self.dict()
all_items = self.model_dump()
stats = {k: v for k, v in all_items.items() if k.startswith("total_")}
@ -175,7 +175,7 @@ class ActionStageReport(BaseModel):
# Add a few additional special cases of aggregatable stats.
if self.event_processing_stats:
for key, value in self.event_processing_stats.dict().items():
for key, value in self.event_processing_stats.model_dump().items():
if value is not None:
stats[f"event_processing_stats.{key}"] = str(value)

View File

@ -15,7 +15,7 @@
import logging
from typing import List, Optional
from pydantic import BaseModel, Field, validator
from pydantic import BaseModel, Field, field_validator
from datahub.configuration.common import ConfigModel
from datahub.emitter.mce_builder import make_tag_urn
@ -61,10 +61,11 @@ class TagPropagationConfig(ConfigModel):
],
)
@validator("tag_prefixes", each_item=True)
def tag_prefix_should_start_with_urn(cls, v: str) -> str:
@field_validator("tag_prefixes")
@classmethod
def tag_prefix_should_start_with_urn(cls, v: List[str]) -> List[str]:
if v:
return make_tag_urn(v)
return [make_tag_urn(item) for item in v]
return v
@ -82,7 +83,7 @@ class TagPropagationAction(Action):
@classmethod
def create(cls, config_dict, ctx):
config = TagPropagationConfig.parse_obj(config_dict or {})
config = TagPropagationConfig.model_validate(config_dict or {})
logger.info(f"TagPropagationAction configured with {config}")
return cls(config, ctx)

View File

@ -56,7 +56,7 @@ class TeamsNotificationAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
action_config = TeamsNotificationConfig.parse_obj(config_dict or {})
action_config = TeamsNotificationConfig.model_validate(config_dict or {})
logger.info(f"Teams notification action configured with {action_config}")
return cls(action_config, ctx)

View File

@ -121,7 +121,7 @@ class TermPropagationAction(Action):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Action":
action_config = TermPropagationConfig.parse_obj(config_dict or {})
action_config = TermPropagationConfig.model_validate(config_dict or {})
logger.info(f"Term Propagation Config action configured with {action_config}")
return cls(action_config, ctx)

View File

@ -88,7 +88,7 @@ class DataHubEventSource(EventSource):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "EventSource":
config = DataHubEventsSourceConfig.parse_obj(config_dict)
config = DataHubEventsSourceConfig.model_validate(config_dict)
return cls(config, ctx)
def events(self) -> Iterable[EventEnvelope]:

View File

@ -115,7 +115,9 @@ class DataHubEventsConsumer:
response = requests.get(endpoint, params=params, headers=headers)
response.raise_for_status()
external_events_response = ExternalEventsResponse.parse_obj(response.json())
external_events_response = ExternalEventsResponse.model_validate(
response.json()
)
# Update our internal offset_id to the newly returned offset
self.offset_id = external_events_response.offsetId

View File

@ -157,7 +157,7 @@ class KafkaEventSource(EventSource):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "EventSource":
config = KafkaEventSourceConfig.parse_obj(config_dict)
config = KafkaEventSourceConfig.model_validate(config_dict)
return cls(config, ctx)
def events(self) -> Iterable[EventEnvelope]:

View File

@ -35,7 +35,7 @@ class FilterTransformer(Transformer):
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Transformer":
config = FilterTransformerConfig.parse_obj(config_dict)
config = FilterTransformerConfig.model_validate(config_dict)
return cls(config)
def transform(self, env_event: EventEnvelope) -> Optional[EventEnvelope]:

View File

@ -40,6 +40,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
### Other Notable Changes
- The `acryl-datahub-actions` package now requires Pydantic V2, while it previously was compatible with both Pydantic V1 and V2.
## 1.1.0
### Breaking Changes