2021-04-07 21:30:21 -07:00
|
|
|
import io
|
|
|
|
import json
|
2021-04-14 13:40:24 -07:00
|
|
|
import pathlib
|
2021-04-07 21:30:21 -07:00
|
|
|
|
|
|
|
import fastavro
|
2021-03-18 02:05:05 -04:00
|
|
|
import pytest
|
2021-07-29 20:04:40 -07:00
|
|
|
from freezegun import freeze_time
|
2021-02-11 18:31:15 -08:00
|
|
|
|
2021-04-30 21:10:12 -07:00
|
|
|
import datahub.metadata.schema_classes as models
|
2021-07-28 14:50:21 -07:00
|
|
|
from datahub.cli.json_file import check_mce_file
|
2021-02-15 15:04:21 -08:00
|
|
|
from datahub.ingestion.run.pipeline import Pipeline
|
2021-06-24 17:11:00 -07:00
|
|
|
from datahub.ingestion.source.file import iterate_mce_file
|
2021-06-17 10:04:28 -07:00
|
|
|
from datahub.metadata.schema_classes import MetadataChangeEventClass
|
|
|
|
from datahub.metadata.schemas import getMetadataChangeEventSchema
|
2021-04-13 17:30:24 -07:00
|
|
|
from tests.test_helpers import mce_helpers
|
2021-12-16 23:07:38 -05:00
|
|
|
from tests.test_helpers.click_helpers import run_datahub_cmd
|
2021-06-30 16:53:20 -07:00
|
|
|
from tests.test_helpers.type_helpers import PytestConfig
|
2021-04-14 13:40:24 -07:00
|
|
|
|
2021-07-29 20:04:40 -07:00
|
|
|
FROZEN_TIME = "2021-07-22 18:54:06"
|
2021-02-11 18:31:15 -08:00
|
|
|
|
2021-07-29 20:04:40 -07:00
|
|
|
|
|
|
|
@freeze_time(FROZEN_TIME)
|
2021-03-18 02:05:05 -04:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"json_filename",
|
|
|
|
[
|
|
|
|
# Normal test.
|
|
|
|
"tests/unit/serde/test_serde_large.json",
|
|
|
|
# Ensure correct representation of chart info's input list.
|
|
|
|
"tests/unit/serde/test_serde_chart_snapshot.json",
|
2021-06-24 17:11:00 -07:00
|
|
|
# Check usage stats as well.
|
|
|
|
"tests/unit/serde/test_serde_usage.json",
|
2021-07-30 17:41:03 -07:00
|
|
|
# Profiles with the MetadataChangeProposal format.
|
|
|
|
"tests/unit/serde/test_serde_profile.json",
|
2021-03-18 02:05:05 -04:00
|
|
|
],
|
|
|
|
)
|
2021-04-14 13:40:24 -07:00
|
|
|
def test_serde_to_json(
|
|
|
|
pytestconfig: PytestConfig, tmp_path: pathlib.Path, json_filename: str
|
|
|
|
) -> None:
|
2021-03-18 02:05:05 -04:00
|
|
|
golden_file = pytestconfig.rootpath / json_filename
|
2021-02-11 18:31:15 -08:00
|
|
|
|
2021-03-18 02:05:05 -04:00
|
|
|
output_filename = "output.json"
|
2021-02-11 21:17:59 -08:00
|
|
|
output_file = tmp_path / output_filename
|
|
|
|
|
2021-02-11 21:34:36 -08:00
|
|
|
pipeline = Pipeline.create(
|
|
|
|
{
|
2021-02-12 12:05:41 -08:00
|
|
|
"source": {"type": "file", "config": {"filename": str(golden_file)}},
|
|
|
|
"sink": {"type": "file", "config": {"filename": str(output_file)}},
|
2021-07-29 20:04:40 -07:00
|
|
|
"run_id": "serde_test",
|
2021-02-11 21:17:59 -08:00
|
|
|
}
|
2021-02-11 21:34:36 -08:00
|
|
|
)
|
2021-02-11 21:17:59 -08:00
|
|
|
pipeline.run()
|
2021-02-18 11:15:13 -08:00
|
|
|
pipeline.raise_from_status()
|
2021-02-11 18:31:15 -08:00
|
|
|
|
2021-08-11 15:47:18 -07:00
|
|
|
mce_helpers.check_golden_file(
|
|
|
|
pytestconfig,
|
|
|
|
output_path=f"{tmp_path}/{output_filename}",
|
|
|
|
golden_path=golden_file,
|
|
|
|
)
|
2021-03-18 02:05:05 -04:00
|
|
|
|
|
|
|
|
2021-04-07 21:30:21 -07:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"json_filename",
|
|
|
|
[
|
|
|
|
"tests/unit/serde/test_serde_large.json",
|
|
|
|
"tests/unit/serde/test_serde_chart_snapshot.json",
|
|
|
|
],
|
|
|
|
)
|
2021-07-29 20:04:40 -07:00
|
|
|
@freeze_time(FROZEN_TIME)
|
2021-04-14 13:40:24 -07:00
|
|
|
def test_serde_to_avro(pytestconfig: PytestConfig, json_filename: str) -> None:
|
2021-04-07 21:30:21 -07:00
|
|
|
# In this test, we want to read in from JSON -> MCE object.
|
|
|
|
# Next we serialize from MCE to Avro and then deserialize back to MCE.
|
|
|
|
# Finally, we want to compare the two MCE objects.
|
|
|
|
|
|
|
|
json_path = pytestconfig.rootpath / json_filename
|
|
|
|
mces = list(iterate_mce_file(str(json_path)))
|
|
|
|
|
|
|
|
# Serialize to Avro.
|
2021-06-17 10:04:28 -07:00
|
|
|
parsed_schema = fastavro.parse_schema(json.loads(getMetadataChangeEventSchema()))
|
2021-04-07 21:30:21 -07:00
|
|
|
fo = io.BytesIO()
|
|
|
|
out_records = [mce.to_obj(tuples=True) for mce in mces]
|
|
|
|
fastavro.writer(fo, parsed_schema, out_records)
|
|
|
|
|
|
|
|
# Deserialized from Avro.
|
|
|
|
fo.seek(0)
|
2021-06-17 06:48:27 +08:00
|
|
|
in_records = list(fastavro.reader(fo, return_record_name=True))
|
2021-04-07 21:30:21 -07:00
|
|
|
in_mces = [
|
|
|
|
MetadataChangeEventClass.from_obj(record, tuples=True) for record in in_records
|
|
|
|
]
|
|
|
|
|
|
|
|
# Check diff
|
|
|
|
assert len(mces) == len(in_mces)
|
|
|
|
for i in range(len(mces)):
|
2021-04-14 19:25:57 -07:00
|
|
|
assert mces[i] == in_mces[i]
|
2021-04-07 21:30:21 -07:00
|
|
|
|
|
|
|
|
2021-03-18 02:05:05 -04:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"json_filename",
|
|
|
|
[
|
|
|
|
# Normal test.
|
|
|
|
"tests/unit/serde/test_serde_large.json",
|
|
|
|
# Check for backwards compatability with specifying all union types.
|
|
|
|
"tests/unit/serde/test_serde_backwards_compat.json",
|
2021-06-24 17:11:00 -07:00
|
|
|
# Usage stats.
|
|
|
|
"tests/unit/serde/test_serde_usage.json",
|
2021-07-30 17:41:03 -07:00
|
|
|
# Profiles with the MetadataChangeProposal format.
|
|
|
|
"tests/unit/serde/test_serde_profile.json",
|
2021-03-18 02:05:05 -04:00
|
|
|
# Ensure sample MCE files are valid.
|
|
|
|
"examples/mce_files/single_mce.json",
|
|
|
|
"examples/mce_files/mce_list.json",
|
|
|
|
"examples/mce_files/bootstrap_mce.json",
|
|
|
|
],
|
|
|
|
)
|
2021-07-29 20:04:40 -07:00
|
|
|
@freeze_time(FROZEN_TIME)
|
2021-04-14 13:40:24 -07:00
|
|
|
def test_check_mce_schema(pytestconfig: PytestConfig, json_filename: str) -> None:
|
2021-03-18 02:05:05 -04:00
|
|
|
json_file_path = pytestconfig.rootpath / json_filename
|
2021-03-15 15:27:30 -07:00
|
|
|
|
2021-12-16 23:07:38 -05:00
|
|
|
run_datahub_cmd(["check", "mce-file", f"{json_file_path}"])
|
2021-04-16 09:42:52 -07:00
|
|
|
|
|
|
|
|
2021-07-28 14:50:21 -07:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"json_filename",
|
|
|
|
[
|
|
|
|
# Extra field.
|
|
|
|
"tests/unit/serde/test_serde_extra_field.json",
|
|
|
|
# Missing fields.
|
|
|
|
"tests/unit/serde/test_serde_missing_field.json",
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_check_mce_schema_failure(
|
|
|
|
pytestconfig: PytestConfig, json_filename: str
|
|
|
|
) -> None:
|
|
|
|
json_file_path = pytestconfig.rootpath / json_filename
|
|
|
|
|
|
|
|
with pytest.raises((ValueError, AssertionError)):
|
|
|
|
check_mce_file(str(json_file_path))
|
|
|
|
|
|
|
|
|
2021-04-16 09:42:52 -07:00
|
|
|
def test_field_discriminator() -> None:
|
|
|
|
cost_object = models.CostClass(
|
|
|
|
costType=models.CostTypeClass.ORG_COST_TYPE,
|
|
|
|
cost=models.CostCostClass(
|
|
|
|
fieldDiscriminator=models.CostCostDiscriminatorClass.costCode,
|
|
|
|
costCode="sampleCostCode",
|
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
assert cost_object.validate()
|