mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-26 09:45:02 +00:00
feat(actions): support pydantic v2 (#13378)
This commit is contained in:
parent
d25d318233
commit
591b6ce0c9
4
.github/workflows/python-build-pages.yml
vendored
4
.github/workflows/python-build-pages.yml
vendored
@ -6,16 +6,20 @@ on:
|
|||||||
paths:
|
paths:
|
||||||
- ".github/workflows/python-build-pages.yml"
|
- ".github/workflows/python-build-pages.yml"
|
||||||
- "metadata-ingestion/**"
|
- "metadata-ingestion/**"
|
||||||
|
- "datahub-actions/**"
|
||||||
- "metadata-ingestion-modules/**"
|
- "metadata-ingestion-modules/**"
|
||||||
- "metadata-models/**"
|
- "metadata-models/**"
|
||||||
|
- "python-build/**"
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches:
|
||||||
- "**"
|
- "**"
|
||||||
paths:
|
paths:
|
||||||
- ".github/workflows/python-build-pages.yml"
|
- ".github/workflows/python-build-pages.yml"
|
||||||
- "metadata-ingestion/**"
|
- "metadata-ingestion/**"
|
||||||
|
- "datahub-actions/**"
|
||||||
- "metadata-ingestion-modules/**"
|
- "metadata-ingestion-modules/**"
|
||||||
- "metadata-models/**"
|
- "metadata-models/**"
|
||||||
|
- "python-build/**"
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||||
|
|||||||
@ -101,33 +101,13 @@ task installDevTest(type: Exec, dependsOn: [installDev]) {
|
|||||||
"touch ${sentinel_file}"
|
"touch ${sentinel_file}"
|
||||||
}
|
}
|
||||||
|
|
||||||
task testQuick(type: Exec, dependsOn: installDevTest) {
|
task testFull(type: Exec, dependsOn: installDevTest) {
|
||||||
// We can't enforce the coverage requirements if we run a subset of the tests.
|
|
||||||
inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
|
inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
|
||||||
inputs.files(project.fileTree(dir: "tests/"))
|
inputs.files(project.fileTree(dir: "tests/"))
|
||||||
outputs.dir("${venv_name}")
|
outputs.dir("${venv_name}")
|
||||||
commandLine 'bash', '-c',
|
commandLine 'bash', '-c',
|
||||||
"source ${venv_name}/bin/activate && set -x && " +
|
"source ${venv_name}/bin/activate && set -x && " +
|
||||||
"pytest -vv ${get_coverage_args('quick')} --continue-on-collection-errors --junit-xml=junit.quick.xml"
|
"pytest -vv ${get_coverage_args('full')} --continue-on-collection-errors --junit-xml=junit.full.xml"
|
||||||
}
|
|
||||||
|
|
||||||
def testFile = hasProperty('testFile') ? testFile : 'unknown'
|
|
||||||
task testSingle(dependsOn: [installDevTest]) {
|
|
||||||
doLast {
|
|
||||||
if (testFile != 'unknown') {
|
|
||||||
exec {
|
|
||||||
commandLine 'bash', '-x', '-c',
|
|
||||||
"source ${venv_name}/bin/activate && pytest ${testFile}"
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new GradleException("No file provided. Use -PtestFile=<test_file>")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
|
|
||||||
commandLine 'bash', '-x', '-c',
|
|
||||||
"source ${venv_name}/bin/activate && pytest -vv ${get_coverage_args('full')} --continue-on-collection-errors --junit-xml=junit.full.xml"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
|
task buildWheel(type: Exec, dependsOn: [environmentSetup]) {
|
||||||
@ -172,7 +152,7 @@ docker {
|
|||||||
|
|
||||||
build.dependsOn install
|
build.dependsOn install
|
||||||
check.dependsOn lint
|
check.dependsOn lint
|
||||||
check.dependsOn testQuick
|
check.dependsOn testFull
|
||||||
|
|
||||||
clean {
|
clean {
|
||||||
delete venv_name
|
delete venv_name
|
||||||
|
|||||||
@ -21,6 +21,13 @@ package_metadata: dict = {}
|
|||||||
with open("./src/datahub_actions/_version.py") as fp:
|
with open("./src/datahub_actions/_version.py") as fp:
|
||||||
exec(fp.read(), package_metadata)
|
exec(fp.read(), package_metadata)
|
||||||
|
|
||||||
|
_version: str = package_metadata["__version__"]
|
||||||
|
_self_pin = (
|
||||||
|
f"=={_version}"
|
||||||
|
if not (_version.endswith(("dev0", "dev1")) or "docker" in _version)
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_long_description():
|
def get_long_description():
|
||||||
root = os.path.dirname(__file__)
|
root = os.path.dirname(__file__)
|
||||||
@ -30,8 +37,6 @@ def get_long_description():
|
|||||||
return description
|
return description
|
||||||
|
|
||||||
|
|
||||||
acryl_datahub_min_version = os.environ.get("ACRYL_DATAHUB_MIN_VERSION") or "1.0.0"
|
|
||||||
|
|
||||||
lint_requirements = {
|
lint_requirements = {
|
||||||
# This is pinned only to avoid spurious errors in CI.
|
# This is pinned only to avoid spurious errors in CI.
|
||||||
# We should make an effort to keep it up to date.
|
# We should make an effort to keep it up to date.
|
||||||
@ -40,18 +45,17 @@ lint_requirements = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
base_requirements = {
|
base_requirements = {
|
||||||
*lint_requirements,
|
f"acryl-datahub[datahub-kafka]{_self_pin}",
|
||||||
f"acryl-datahub[datahub-kafka]>={acryl_datahub_min_version}",
|
|
||||||
# Compatibility.
|
# Compatibility.
|
||||||
"typing_extensions>=3.7.4; python_version < '3.8'",
|
"typing_extensions>=3.7.4; python_version < '3.8'",
|
||||||
"mypy_extensions>=0.4.3",
|
"mypy_extensions>=0.4.3",
|
||||||
# Actual dependencies.
|
# Actual dependencies.
|
||||||
"typing-inspect",
|
"typing-inspect",
|
||||||
"pydantic<2",
|
"pydantic>=1.10.21",
|
||||||
"dictdiffer",
|
|
||||||
"ratelimit",
|
"ratelimit",
|
||||||
|
# Lower bounds on httpcore and h11 due to CVE-2025-43859.
|
||||||
"httpcore>=1.0.9",
|
"httpcore>=1.0.9",
|
||||||
"h11>=0.16"
|
"h11>=0.16",
|
||||||
}
|
}
|
||||||
|
|
||||||
framework_common = {
|
framework_common = {
|
||||||
@ -67,14 +71,6 @@ framework_common = {
|
|||||||
"tenacity",
|
"tenacity",
|
||||||
}
|
}
|
||||||
|
|
||||||
aws_common = {
|
|
||||||
# AWS Python SDK
|
|
||||||
"boto3",
|
|
||||||
# Deal with a version incompatibility between botocore (used by boto3) and urllib3.
|
|
||||||
# See https://github.com/boto/botocore/pull/2563.
|
|
||||||
"botocore!=1.23.0",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Note: for all of these, framework_common will be added.
|
# Note: for all of these, framework_common will be added.
|
||||||
plugins: Dict[str, Set[str]] = {
|
plugins: Dict[str, Set[str]] = {
|
||||||
# Source Plugins
|
# Source Plugins
|
||||||
@ -94,7 +90,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"tag_propagation": set(),
|
"tag_propagation": set(),
|
||||||
"term_propagation": set(),
|
"term_propagation": set(),
|
||||||
"snowflake_tag_propagation": {
|
"snowflake_tag_propagation": {
|
||||||
f"acryl-datahub[snowflake]>={acryl_datahub_min_version}"
|
f"acryl-datahub[snowflake-slim]{_self_pin}",
|
||||||
},
|
},
|
||||||
"doc_propagation": set(),
|
"doc_propagation": set(),
|
||||||
# Transformer Plugins (None yet)
|
# Transformer Plugins (None yet)
|
||||||
@ -115,10 +111,10 @@ mypy_stubs = {
|
|||||||
"types-cachetools",
|
"types-cachetools",
|
||||||
# versions 0.1.13 and 0.1.14 seem to have issues
|
# versions 0.1.13 and 0.1.14 seem to have issues
|
||||||
"types-click==0.1.12",
|
"types-click==0.1.12",
|
||||||
"boto3-stubs[s3,glue,sagemaker]",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
base_dev_requirements = {
|
base_dev_requirements = {
|
||||||
|
*lint_requirements,
|
||||||
*base_requirements,
|
*base_requirements,
|
||||||
*framework_common,
|
*framework_common,
|
||||||
*mypy_stubs,
|
*mypy_stubs,
|
||||||
@ -169,6 +165,9 @@ full_test_dev_requirements = {
|
|||||||
]
|
]
|
||||||
for dependency in plugins[plugin]
|
for dependency in plugins[plugin]
|
||||||
),
|
),
|
||||||
|
# In our tests, we want to always test against pydantic v2.
|
||||||
|
# However, we maintain compatibility with pydantic v1 for now.
|
||||||
|
"pydantic>2",
|
||||||
}
|
}
|
||||||
|
|
||||||
entry_points = {
|
entry_points = {
|
||||||
|
|||||||
@ -12,16 +12,16 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from enum import Enum
|
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from datahub.configuration import ConfigModel
|
from datahub.configuration import ConfigModel
|
||||||
|
from datahub.configuration.common import ConfigEnum
|
||||||
from datahub.ingestion.graph.client import DatahubClientConfig
|
from datahub.ingestion.graph.client import DatahubClientConfig
|
||||||
|
|
||||||
|
|
||||||
class FailureMode(str, Enum):
|
class FailureMode(ConfigEnum):
|
||||||
# Log the failed event to the failed events log. Then throw an pipeline exception to stop the pipeline.
|
# Log the failed event to the failed events log. Then throw an pipeline exception to stop the pipeline.
|
||||||
THROW = "THROW"
|
THROW = "THROW"
|
||||||
# Log the failed event to the failed events log. Then continue processing the event stream.
|
# Log the failed event to the failed events log. Then continue processing the event stream.
|
||||||
@ -30,17 +30,17 @@ class FailureMode(str, Enum):
|
|||||||
|
|
||||||
class SourceConfig(ConfigModel):
|
class SourceConfig(ConfigModel):
|
||||||
type: str
|
type: str
|
||||||
config: Optional[Dict[str, Any]]
|
config: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
class TransformConfig(ConfigModel):
|
class TransformConfig(ConfigModel):
|
||||||
type: str
|
type: str
|
||||||
config: Optional[Dict[str, Any]]
|
config: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
class FilterConfig(ConfigModel):
|
class FilterConfig(ConfigModel):
|
||||||
event_type: Union[str, List[str]]
|
event_type: Union[str, List[str]]
|
||||||
event: Optional[Dict[str, Any]]
|
event: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
class ActionConfig(ConfigModel):
|
class ActionConfig(ConfigModel):
|
||||||
@ -49,12 +49,11 @@ class ActionConfig(ConfigModel):
|
|||||||
|
|
||||||
|
|
||||||
class PipelineOptions(BaseModel):
|
class PipelineOptions(BaseModel):
|
||||||
retry_count: Optional[int]
|
retry_count: Optional[int] = None
|
||||||
failure_mode: Optional[FailureMode]
|
failure_mode: Optional[FailureMode] = None
|
||||||
failed_events_dir: Optional[str] # The path where failed events should be logged.
|
failed_events_dir: Optional[str] = (
|
||||||
|
None # The path where failed events should be logged.
|
||||||
class Config:
|
)
|
||||||
use_enum_values = True
|
|
||||||
|
|
||||||
|
|
||||||
class PipelineConfig(ConfigModel):
|
class PipelineConfig(ConfigModel):
|
||||||
@ -68,8 +67,8 @@ class PipelineConfig(ConfigModel):
|
|||||||
name: str
|
name: str
|
||||||
enabled: bool = True
|
enabled: bool = True
|
||||||
source: SourceConfig
|
source: SourceConfig
|
||||||
filter: Optional[FilterConfig]
|
filter: Optional[FilterConfig] = None
|
||||||
transform: Optional[List[TransformConfig]]
|
transform: Optional[List[TransformConfig]] = None
|
||||||
action: ActionConfig
|
action: ActionConfig
|
||||||
datahub: Optional[DatahubClientConfig]
|
datahub: Optional[DatahubClientConfig] = None
|
||||||
options: Optional[PipelineOptions]
|
options: Optional[PipelineOptions] = None
|
||||||
|
|||||||
@ -68,8 +68,8 @@ def import_path(path: str) -> Any:
|
|||||||
|
|
||||||
|
|
||||||
class ExecutorConfig(BaseModel):
|
class ExecutorConfig(BaseModel):
|
||||||
executor_id: Optional[str]
|
executor_id: Optional[str] = None
|
||||||
task_configs: Optional[List[TaskConfig]]
|
task_configs: Optional[List[TaskConfig]] = None
|
||||||
|
|
||||||
|
|
||||||
# Listens to new Execution Requests & dispatches them to the appropriate handler.
|
# Listens to new Execution Requests & dispatches them to the appropriate handler.
|
||||||
@ -203,7 +203,10 @@ class ExecutorAction(Action):
|
|||||||
SecretStoreConfig(type="env", config=dict({})),
|
SecretStoreConfig(type="env", config=dict({})),
|
||||||
SecretStoreConfig(
|
SecretStoreConfig(
|
||||||
type="datahub",
|
type="datahub",
|
||||||
config=DataHubSecretStoreConfig(graph_client=graph),
|
# TODO: Once SecretStoreConfig is updated to accept arbitrary types
|
||||||
|
# and not just dicts, we can just pass in the DataHubSecretStoreConfig
|
||||||
|
# object directly.
|
||||||
|
config=DataHubSecretStoreConfig(graph_client=graph).dict(),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
graph_client=graph,
|
graph_client=graph,
|
||||||
|
|||||||
@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
class HelloWorldConfig(BaseModel):
|
class HelloWorldConfig(BaseModel):
|
||||||
# Whether to print the message in upper case.
|
# Whether to print the message in upper case.
|
||||||
to_upper: Optional[bool]
|
to_upper: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
# A basic example of a DataHub action that prints all
|
# A basic example of a DataHub action that prints all
|
||||||
|
|||||||
@ -19,13 +19,13 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class MetadataChangeEmitterConfig(BaseModel):
|
class MetadataChangeEmitterConfig(BaseModel):
|
||||||
gms_server: Optional[str]
|
gms_server: Optional[str] = None
|
||||||
gms_auth_token: Optional[str]
|
gms_auth_token: Optional[str] = None
|
||||||
aspects_to_exclude: Optional[List]
|
aspects_to_exclude: Optional[List] = None
|
||||||
aspects_to_include: Optional[List]
|
aspects_to_include: Optional[List] = None
|
||||||
entity_type_to_exclude: List[str] = Field(default_factory=list)
|
entity_type_to_exclude: List[str] = Field(default_factory=list)
|
||||||
extra_headers: Optional[Dict[str, str]]
|
extra_headers: Optional[Dict[str, str]] = None
|
||||||
urn_regex: Optional[str]
|
urn_regex: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class MetadataChangeSyncAction(Action):
|
class MetadataChangeSyncAction(Action):
|
||||||
|
|||||||
@ -15,11 +15,11 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from enum import Enum
|
|
||||||
from typing import Iterable, List, Optional, Tuple
|
from typing import Iterable, List, Optional, Tuple
|
||||||
|
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
|
||||||
|
from datahub.configuration.common import ConfigEnum
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
from datahub.metadata.schema_classes import (
|
from datahub.metadata.schema_classes import (
|
||||||
AuditStampClass,
|
AuditStampClass,
|
||||||
@ -60,7 +60,7 @@ class DocPropagationDirective(PropagationDirective):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ColumnPropagationRelationships(str, Enum):
|
class ColumnPropagationRelationships(ConfigEnum):
|
||||||
UPSTREAM = "upstream"
|
UPSTREAM = "upstream"
|
||||||
DOWNSTREAM = "downstream"
|
DOWNSTREAM = "downstream"
|
||||||
SIBLING = "sibling"
|
SIBLING = "sibling"
|
||||||
@ -82,18 +82,15 @@ class DocPropagationConfig(PropagationConfig):
|
|||||||
enabled: bool = Field(
|
enabled: bool = Field(
|
||||||
True,
|
True,
|
||||||
description="Indicates whether documentation propagation is enabled or not.",
|
description="Indicates whether documentation propagation is enabled or not.",
|
||||||
example=True,
|
|
||||||
)
|
)
|
||||||
columns_enabled: bool = Field(
|
columns_enabled: bool = Field(
|
||||||
True,
|
True,
|
||||||
description="Indicates whether column documentation propagation is enabled or not.",
|
description="Indicates whether column documentation propagation is enabled or not.",
|
||||||
example=True,
|
|
||||||
)
|
)
|
||||||
# TODO: Currently this flag does nothing. Datasets are NOT supported for docs propagation.
|
# TODO: Currently this flag does nothing. Datasets are NOT supported for docs propagation.
|
||||||
datasets_enabled: bool = Field(
|
datasets_enabled: bool = Field(
|
||||||
False,
|
False,
|
||||||
description="Indicates whether dataset level documentation propagation is enabled or not.",
|
description="Indicates whether dataset level documentation propagation is enabled or not.",
|
||||||
example=False,
|
|
||||||
)
|
)
|
||||||
column_propagation_relationships: List[ColumnPropagationRelationships] = Field(
|
column_propagation_relationships: List[ColumnPropagationRelationships] = Field(
|
||||||
[
|
[
|
||||||
@ -102,11 +99,6 @@ class DocPropagationConfig(PropagationConfig):
|
|||||||
ColumnPropagationRelationships.UPSTREAM,
|
ColumnPropagationRelationships.UPSTREAM,
|
||||||
],
|
],
|
||||||
description="Relationships for column documentation propagation.",
|
description="Relationships for column documentation propagation.",
|
||||||
example=[
|
|
||||||
ColumnPropagationRelationships.UPSTREAM,
|
|
||||||
ColumnPropagationRelationships.SIBLING,
|
|
||||||
ColumnPropagationRelationships.DOWNSTREAM,
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -15,13 +15,13 @@
|
|||||||
import abc
|
import abc
|
||||||
import json
|
import json
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from enum import Enum
|
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
|
|
||||||
import pydantic
|
import pydantic
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from datahub.ingestion.api.report import Report, SupportsAsObj
|
from datahub.ingestion.api.report import Report, SupportsAsObj
|
||||||
|
from datahub.utilities.str_enum import StrEnum
|
||||||
from datahub_actions.action.action import Action
|
from datahub_actions.action.action import Action
|
||||||
from datahub_actions.event.event_envelope import EventEnvelope
|
from datahub_actions.event.event_envelope import EventEnvelope
|
||||||
from datahub_actions.event.event_registry import (
|
from datahub_actions.event.event_registry import (
|
||||||
@ -114,7 +114,7 @@ class EventProcessingStats(BaseModel):
|
|||||||
return json.dumps(self.dict(), indent=2)
|
return json.dumps(self.dict(), indent=2)
|
||||||
|
|
||||||
|
|
||||||
class StageStatus(str, Enum):
|
class StageStatus(StrEnum):
|
||||||
SUCCESS = "success"
|
SUCCESS = "success"
|
||||||
FAILURE = "failure"
|
FAILURE = "failure"
|
||||||
RUNNING = "running"
|
RUNNING = "running"
|
||||||
|
|||||||
@ -52,12 +52,13 @@ class TagPropagationConfig(ConfigModel):
|
|||||||
enabled: bool = Field(
|
enabled: bool = Field(
|
||||||
True,
|
True,
|
||||||
description="Indicates whether tag propagation is enabled or not.",
|
description="Indicates whether tag propagation is enabled or not.",
|
||||||
example=True,
|
|
||||||
)
|
)
|
||||||
tag_prefixes: Optional[List[str]] = Field(
|
tag_prefixes: Optional[List[str]] = Field(
|
||||||
None,
|
None,
|
||||||
description="Optional list of tag prefixes to restrict tag propagation.",
|
description="Optional list of tag prefixes to restrict tag propagation.",
|
||||||
example=["urn:li:tag:classification"],
|
examples=[
|
||||||
|
"urn:li:tag:classification",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
@validator("tag_prefixes", each_item=True)
|
@validator("tag_prefixes", each_item=True)
|
||||||
|
|||||||
@ -60,17 +60,21 @@ class TermPropagationConfig(ConfigModel):
|
|||||||
enabled: bool = Field(
|
enabled: bool = Field(
|
||||||
True,
|
True,
|
||||||
description="Indicates whether term propagation is enabled or not.",
|
description="Indicates whether term propagation is enabled or not.",
|
||||||
example=True,
|
|
||||||
)
|
)
|
||||||
target_terms: Optional[List[str]] = Field(
|
target_terms: Optional[List[str]] = Field(
|
||||||
None,
|
None,
|
||||||
description="Optional target terms to restrict term propagation to this and all terms related to these terms.",
|
description="Optional target terms to restrict term propagation to this and all terms related to these terms.",
|
||||||
example="[urn:li:glossaryTerm:Sensitive]",
|
examples=[
|
||||||
|
"urn:li:glossaryTerm:Sensitive",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
term_groups: Optional[List[str]] = Field(
|
term_groups: Optional[List[str]] = Field(
|
||||||
None,
|
None,
|
||||||
description="Optional list of term groups to restrict term propagation.",
|
description="Optional list of term groups to restrict term propagation.",
|
||||||
example=["Group1", "Group2"],
|
examples=[
|
||||||
|
"Group1",
|
||||||
|
"Group2",
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -44,7 +44,7 @@ def build_entity_change_event(payload: GenericPayloadClass) -> EntityChangeEvent
|
|||||||
|
|
||||||
class DataHubEventsSourceConfig(ConfigModel):
|
class DataHubEventsSourceConfig(ConfigModel):
|
||||||
topic: str = PLATFORM_EVENT_TOPIC_NAME
|
topic: str = PLATFORM_EVENT_TOPIC_NAME
|
||||||
consumer_id: Optional[str] # Used to store offset for the consumer.
|
consumer_id: Optional[str] = None # Used to store offset for the consumer.
|
||||||
lookback_days: Optional[int] = None
|
lookback_days: Optional[int] = None
|
||||||
reset_offsets: Optional[bool] = False
|
reset_offsets: Optional[bool] = False
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,9 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class EventConsumerState(BaseModel):
|
class EventConsumerState(BaseModel):
|
||||||
VERSION = 1 # Increment this version when the schema of EventConsumerState changes
|
VERSION: int = (
|
||||||
|
1 # Increment this version when the schema of EventConsumerState changes
|
||||||
|
)
|
||||||
offset_id: Optional[str] = None
|
offset_id: Optional[str] = None
|
||||||
timestamp: Optional[int] = None
|
timestamp: Optional[int] = None
|
||||||
|
|
||||||
|
|||||||
@ -12,67 +12,14 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from datahub.configuration.common import ConfigurationError
|
|
||||||
from datahub.ingestion.api.registry import PluginRegistry
|
|
||||||
from datahub_actions.action.action import Action
|
|
||||||
from datahub_actions.action.action_registry import action_registry
|
from datahub_actions.action.action_registry import action_registry
|
||||||
from datahub_actions.plugin.action.hello_world.hello_world import HelloWorldAction
|
|
||||||
|
|
||||||
|
|
||||||
def test_registry_nonempty():
|
def test_registry_nonempty():
|
||||||
assert len(action_registry.mapping) > 0
|
assert len(action_registry.mapping) > 0
|
||||||
|
|
||||||
|
|
||||||
def test_registry():
|
def test_all_registry_plugins_enabled() -> None:
|
||||||
fake_registry = PluginRegistry[Action]()
|
for plugin in action_registry.mapping.keys():
|
||||||
fake_registry.register("hello_world", HelloWorldAction)
|
assert action_registry.is_enabled(plugin), f"Plugin {plugin} is not enabled"
|
||||||
|
|
||||||
assert len(fake_registry.mapping) > 0
|
|
||||||
assert fake_registry.is_enabled("hello_world")
|
|
||||||
assert fake_registry.get("hello_world") == HelloWorldAction
|
|
||||||
assert (
|
|
||||||
fake_registry.get(
|
|
||||||
"datahub_actions.plugin.action.hello_world.hello_world.HelloWorldAction"
|
|
||||||
)
|
|
||||||
== HelloWorldAction
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test lazy-loading capabilities.
|
|
||||||
fake_registry.register_lazy(
|
|
||||||
"lazy-hello-world",
|
|
||||||
"datahub_actions.plugin.action.hello_world.hello_world:HelloWorldAction",
|
|
||||||
)
|
|
||||||
assert fake_registry.get("lazy-hello-world") == HelloWorldAction
|
|
||||||
|
|
||||||
# Test Registry Errors
|
|
||||||
fake_registry.register_lazy("lazy-error", "thisdoesnot.exist")
|
|
||||||
with pytest.raises(ConfigurationError, match="disabled"):
|
|
||||||
fake_registry.get("lazy-error")
|
|
||||||
with pytest.raises(KeyError, match="special characters"):
|
|
||||||
fake_registry.register("thisdoesnotexist.otherthing", HelloWorldAction)
|
|
||||||
with pytest.raises(KeyError, match="in use"):
|
|
||||||
fake_registry.register("hello_world", HelloWorldAction)
|
|
||||||
with pytest.raises(KeyError, match="not find"):
|
|
||||||
fake_registry.get("thisdoesnotexist")
|
|
||||||
|
|
||||||
# Test error-checking on registered types.
|
|
||||||
with pytest.raises(ValueError, match="abstract"):
|
|
||||||
fake_registry.register("thisdoesnotexist", Action) # type: ignore
|
|
||||||
|
|
||||||
class DummyClass: # Does not extend Action.
|
|
||||||
pass
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="derived"):
|
|
||||||
fake_registry.register("thisdoesnotexist", DummyClass) # type: ignore
|
|
||||||
|
|
||||||
# Test disabled actions
|
|
||||||
fake_registry.register_disabled("disabled", ModuleNotFoundError("disabled action"))
|
|
||||||
fake_registry.register_disabled(
|
|
||||||
"disabled-exception", Exception("second disabled action")
|
|
||||||
)
|
|
||||||
with pytest.raises(ConfigurationError, match="disabled"):
|
|
||||||
fake_registry.get("disabled")
|
|
||||||
with pytest.raises(ConfigurationError, match="disabled"):
|
|
||||||
fake_registry.get("disabled-exception")
|
|
||||||
|
|||||||
@ -131,11 +131,14 @@ cachetools_lib = {
|
|||||||
"cachetools",
|
"cachetools",
|
||||||
}
|
}
|
||||||
|
|
||||||
sql_common = (
|
sql_common_slim = {
|
||||||
{
|
|
||||||
# Required for all SQL sources.
|
# Required for all SQL sources.
|
||||||
# This is temporary lower bound that we're open to loosening/tightening as requirements show up
|
# This is temporary lower bound that we're open to loosening/tightening as requirements show up
|
||||||
"sqlalchemy>=1.4.39, <2",
|
"sqlalchemy>=1.4.39, <2",
|
||||||
|
}
|
||||||
|
sql_common = (
|
||||||
|
{
|
||||||
|
*sql_common_slim,
|
||||||
# Required for SQL profiling.
|
# Required for SQL profiling.
|
||||||
"great-expectations>=0.15.12, <=0.15.50",
|
"great-expectations>=0.15.12, <=0.15.50",
|
||||||
*pydantic_no_v2, # because of great-expectations
|
*pydantic_no_v2, # because of great-expectations
|
||||||
@ -220,8 +223,6 @@ redshift_common = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
snowflake_common = {
|
snowflake_common = {
|
||||||
# Snowflake plugin utilizes sql common
|
|
||||||
*sql_common,
|
|
||||||
# https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350
|
# https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350
|
||||||
"snowflake-sqlalchemy>=1.4.3",
|
"snowflake-sqlalchemy>=1.4.3",
|
||||||
"snowflake-connector-python>=3.4.0",
|
"snowflake-connector-python>=3.4.0",
|
||||||
@ -229,7 +230,7 @@ snowflake_common = {
|
|||||||
"cryptography",
|
"cryptography",
|
||||||
"msal",
|
"msal",
|
||||||
*cachetools_lib,
|
*cachetools_lib,
|
||||||
} | classification_lib
|
}
|
||||||
|
|
||||||
trino = {
|
trino = {
|
||||||
"trino[sqlalchemy]>=0.308",
|
"trino[sqlalchemy]>=0.308",
|
||||||
@ -400,6 +401,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
| {
|
| {
|
||||||
"google-cloud-datacatalog-lineage==0.2.2",
|
"google-cloud-datacatalog-lineage==0.2.2",
|
||||||
},
|
},
|
||||||
|
"bigquery-slim": bigquery_common,
|
||||||
"bigquery-queries": sql_common | bigquery_common | sqlglot_lib,
|
"bigquery-queries": sql_common | bigquery_common | sqlglot_lib,
|
||||||
"clickhouse": sql_common | clickhouse_common,
|
"clickhouse": sql_common | clickhouse_common,
|
||||||
"clickhouse-usage": sql_common | usage_common | clickhouse_common,
|
"clickhouse-usage": sql_common | usage_common | clickhouse_common,
|
||||||
@ -502,9 +504,10 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"abs": {*abs_base, *data_lake_profiling},
|
"abs": {*abs_base, *data_lake_profiling},
|
||||||
"sagemaker": aws_common,
|
"sagemaker": aws_common,
|
||||||
"salesforce": {"simple-salesforce", *cachetools_lib},
|
"salesforce": {"simple-salesforce", *cachetools_lib},
|
||||||
"snowflake": snowflake_common | usage_common | sqlglot_lib,
|
"snowflake": snowflake_common | sql_common | usage_common | sqlglot_lib,
|
||||||
"snowflake-summary": snowflake_common | usage_common | sqlglot_lib,
|
"snowflake-slim": snowflake_common,
|
||||||
"snowflake-queries": snowflake_common | usage_common | sqlglot_lib,
|
"snowflake-summary": snowflake_common | sql_common | usage_common | sqlglot_lib,
|
||||||
|
"snowflake-queries": snowflake_common | sql_common | usage_common | sqlglot_lib,
|
||||||
"sqlalchemy": sql_common,
|
"sqlalchemy": sql_common,
|
||||||
"sql-queries": usage_common | sqlglot_lib,
|
"sql-queries": usage_common | sqlglot_lib,
|
||||||
"slack": slack,
|
"slack": slack,
|
||||||
@ -935,6 +938,8 @@ See the [DataHub docs](https://docs.datahub.com/docs/metadata-ingestion).
|
|||||||
"sql-parser",
|
"sql-parser",
|
||||||
"iceberg",
|
"iceberg",
|
||||||
"feast",
|
"feast",
|
||||||
|
"bigquery-slim",
|
||||||
|
"snowflake-slim",
|
||||||
}
|
}
|
||||||
else set()
|
else set()
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user