102 lines
2.7 KiB
Python
Raw Normal View History

from typing import Iterable
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.source import Source, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.metadata.schema_classes import (
StatusClass,
SubTypesClass,
)
from datahub.utilities.urns.dataset_urn import DatasetUrn
def _get_urn() -> str:
return str(
DatasetUrn.create_from_ids(
platform_id="elasticsearch",
table_name="fooIndex",
env="PROD",
)
)
class FakeSource(Source):
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
return [
MetadataWorkUnit(
id="test-workunit",
mcp=MetadataChangeProposalWrapper(
entityUrn=_get_urn(),
aspect=StatusClass(removed=False),
),
)
]
def __init__(self, ctx: PipelineContext):
super().__init__(ctx)
self.source_report = SourceReport()
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "FakeSource":
return FakeSource(ctx)
def get_report(self) -> SourceReport:
return self.source_report
def close(self) -> None:
return super().close()
def test_aspects_by_subtypes():
source = FakeSource(PipelineContext(run_id="test_aspects_by_subtypes"))
for wu in source.get_workunits_internal():
source.source_report.report_workunit(wu)
source.source_report.compute_stats()
assert source.source_report.get_aspects_by_subtypes_dict() == {
"dataset": {
"unknown": {"status": 1},
}
}
source.source_report.report_workunit(
MetadataChangeProposalWrapper(
entityUrn=_get_urn(),
aspect=SubTypesClass(typeNames=["Table"]),
).as_workunit()
)
source.source_report.compute_stats()
assert source.source_report.get_aspects_by_subtypes_dict() == {
"dataset": {
"Table": {"status": 1, "subTypes": 1},
}
}
def test_discretize_dict_values():
"""Test the _discretize_dict_values static method."""
test_dict = {
"dataset": {
"schemaMetadata": 5,
"status": 12,
"ownership": 3,
},
"chart": {
"status": 8,
"ownership": 1,
},
}
result = SourceReport._discretize_dict_values(test_dict)
assert result == {
"dataset": {
"schemaMetadata": 4,
"status": 8,
"ownership": 2,
},
"chart": {
"status": 8,
"ownership": 1,
},
}