mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-13 18:04:55 +00:00
feat(sdk): support patches as MCPs in file source (#8220)
Co-authored-by: Shirshanka Das <shirshanka@apache.org>
This commit is contained in:
parent
9254a1b614
commit
2d7692a245
@ -197,6 +197,11 @@ class MetadataChangeProposalWrapper:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception if the generic aspect is invalid, e.g. contains invalid json.
|
Exception if the generic aspect is invalid, e.g. contains invalid json.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if mcpc.changeType != ChangeTypeClass.UPSERT:
|
||||||
|
# We can only generate MCPWs for upserts.
|
||||||
|
return None
|
||||||
|
|
||||||
converted, aspect = _try_from_generic_aspect(mcpc.aspectName, mcpc.aspect)
|
converted, aspect = _try_from_generic_aspect(mcpc.aspectName, mcpc.aspect)
|
||||||
if converted:
|
if converted:
|
||||||
return cls(
|
return cls(
|
||||||
|
@ -0,0 +1,42 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "datasetProperties",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"replace\", \"path\": \"/description\", \"value\": \"test description\"}, {\"op\": \"add\", \"path\": \"/customProperties/test_key_1\", \"value\": \"test_value_1\"}, {\"op\": \"add\", \"path\": \"/customProperties/test_key_2\", \"value\": \"test_value_2\"}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "globalTags",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"add\", \"path\": \"/tags/urn:li:tag:test_tag\", \"value\": {\"tag\": \"urn:li:tag:test_tag\"}}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "upstreamLineage",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cfct_users_created_upstream%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD)\", \"type\": \"TRANSFORMED\"}}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "editableSchemaMetadata",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"add\", \"path\": \"/editableSchemaFieldInfo/field1/globalTags/tags/urn:li:tag:tag1\", \"value\": {\"tag\": \"urn:li:tag:tag1\"}}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
66
metadata-ingestion/tests/unit/patch/test_patch_builder.py
Normal file
66
metadata-ingestion/tests/unit/patch/test_patch_builder.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn
|
||||||
|
from datahub.ingestion.sink.file import write_metadata_file
|
||||||
|
from datahub.metadata.schema_classes import (
|
||||||
|
DatasetLineageTypeClass,
|
||||||
|
GenericAspectClass,
|
||||||
|
MetadataChangeProposalClass,
|
||||||
|
TagAssociationClass,
|
||||||
|
UpstreamClass,
|
||||||
|
)
|
||||||
|
from datahub.specific.dataset import DatasetPatchBuilder
|
||||||
|
|
||||||
|
|
||||||
|
def test_basic_dataset_patch_builder():
|
||||||
|
patcher = DatasetPatchBuilder(
|
||||||
|
make_dataset_urn(platform="hive", name="fct_users_created", env="PROD")
|
||||||
|
).add_tag(TagAssociationClass(tag=make_tag_urn("test_tag")))
|
||||||
|
|
||||||
|
assert patcher.build() == [
|
||||||
|
MetadataChangeProposalClass(
|
||||||
|
entityType="dataset",
|
||||||
|
entityUrn="urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
changeType="PATCH",
|
||||||
|
aspectName="globalTags",
|
||||||
|
aspect=GenericAspectClass(
|
||||||
|
value=b'[{"op": "add", "path": "/tags/urn:li:tag:test_tag", "value": {"tag": "urn:li:tag:test_tag"}}]',
|
||||||
|
contentType="application/json-patch+json",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_complex_dataset_patch(
|
||||||
|
pytestconfig: pytest.Config, tmp_path: pathlib.Path
|
||||||
|
) -> None:
|
||||||
|
patcher = (
|
||||||
|
DatasetPatchBuilder(
|
||||||
|
make_dataset_urn(platform="hive", name="fct_users_created", env="PROD")
|
||||||
|
)
|
||||||
|
.set_description("test description")
|
||||||
|
.add_custom_property("test_key_1", "test_value_1")
|
||||||
|
.add_custom_property("test_key_2", "test_value_2")
|
||||||
|
.add_tag(TagAssociationClass(tag=make_tag_urn("test_tag")))
|
||||||
|
.add_upstream_lineage(
|
||||||
|
upstream=UpstreamClass(
|
||||||
|
dataset=make_dataset_urn(
|
||||||
|
platform="hive", name="fct_users_created_upstream", env="PROD"
|
||||||
|
),
|
||||||
|
type=DatasetLineageTypeClass.TRANSFORMED,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
patcher.for_field("field1").add_tag(TagAssociationClass(tag=make_tag_urn("tag1")))
|
||||||
|
|
||||||
|
out_path = tmp_path / "patch.json"
|
||||||
|
write_metadata_file(out_path, patcher.build())
|
||||||
|
|
||||||
|
assert json.loads(out_path.read_text()) == json.loads(
|
||||||
|
(
|
||||||
|
pytestconfig.rootpath / "tests/unit/patch/complex_dataset_patch.json"
|
||||||
|
).read_text()
|
||||||
|
)
|
@ -36,15 +36,16 @@ FROZEN_TIME = "2021-07-22 18:54:06"
|
|||||||
"tests/unit/serde/test_serde_usage.json",
|
"tests/unit/serde/test_serde_usage.json",
|
||||||
# Profiles with the MetadataChangeProposal format.
|
# Profiles with the MetadataChangeProposal format.
|
||||||
"tests/unit/serde/test_serde_profile.json",
|
"tests/unit/serde/test_serde_profile.json",
|
||||||
|
# Test one that uses patch.
|
||||||
|
"tests/unit/serde/test_serde_patch.json",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_serde_to_json(
|
def test_serde_to_json(
|
||||||
pytestconfig: PytestConfig, tmp_path: pathlib.Path, json_filename: str
|
pytestconfig: PytestConfig, tmp_path: pathlib.Path, json_filename: str
|
||||||
) -> None:
|
) -> None:
|
||||||
golden_file = pytestconfig.rootpath / json_filename
|
golden_file = pytestconfig.rootpath / json_filename
|
||||||
|
output_file = tmp_path / "output.json"
|
||||||
|
|
||||||
output_filename = "output.json"
|
|
||||||
output_file = tmp_path / output_filename
|
|
||||||
pipeline = Pipeline.create(
|
pipeline = Pipeline.create(
|
||||||
{
|
{
|
||||||
"source": {"type": "file", "config": {"filename": str(golden_file)}},
|
"source": {"type": "file", "config": {"filename": str(golden_file)}},
|
||||||
@ -57,7 +58,7 @@ def test_serde_to_json(
|
|||||||
|
|
||||||
mce_helpers.check_golden_file(
|
mce_helpers.check_golden_file(
|
||||||
pytestconfig,
|
pytestconfig,
|
||||||
output_path=f"{tmp_path}/{output_filename}",
|
output_path=f"{output_file}",
|
||||||
golden_path=golden_file,
|
golden_path=golden_file,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
58
metadata-ingestion/tests/unit/serde/test_serde_patch.json
Normal file
58
metadata-ingestion/tests/unit/serde/test_serde_patch.json
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "datasetProperties",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"replace\", \"path\": \"/description\", \"value\": \"test description\"}, {\"op\": \"add\", \"path\": \"/customProperties/test_key_1\", \"value\": \"test_value_1\"}, {\"op\": \"add\", \"path\": \"/customProperties/test_key_2\", \"value\": \"test_value_2\"}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1626980046000,
|
||||||
|
"runId": "serde_test"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "globalTags",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"add\", \"path\": \"/tags/urn:li:tag:test_tag\", \"value\": {\"tag\": \"urn:li:tag:test_tag\"}}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1626980046000,
|
||||||
|
"runId": "serde_test"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "upstreamLineage",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"add\", \"path\": \"/upstreams/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2Cfct_users_created_upstream%2CPROD%29\", \"value\": {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created_upstream,PROD)\", \"type\": \"TRANSFORMED\"}}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1626980046000,
|
||||||
|
"runId": "serde_test"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataset",
|
||||||
|
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
|
||||||
|
"changeType": "PATCH",
|
||||||
|
"aspectName": "editableSchemaMetadata",
|
||||||
|
"aspect": {
|
||||||
|
"value": "[{\"op\": \"add\", \"path\": \"/editableSchemaFieldInfo/field1/globalTags/tags/urn:li:tag:tag1\", \"value\": {\"tag\": \"urn:li:tag:tag1\"}}]",
|
||||||
|
"contentType": "application/json-patch+json"
|
||||||
|
},
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1626980046000,
|
||||||
|
"runId": "serde_test"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
Loading…
x
Reference in New Issue
Block a user