102 lines
2.7 KiB
Python

from typing import Iterable
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.source import Source, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.metadata.schema_classes import (
StatusClass,
SubTypesClass,
)
from datahub.utilities.urns.dataset_urn import DatasetUrn
def _get_urn() -> str:
return str(
DatasetUrn.create_from_ids(
platform_id="elasticsearch",
table_name="fooIndex",
env="PROD",
)
)
class FakeSource(Source):
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
return [
MetadataWorkUnit(
id="test-workunit",
mcp=MetadataChangeProposalWrapper(
entityUrn=_get_urn(),
aspect=StatusClass(removed=False),
),
)
]
def __init__(self, ctx: PipelineContext):
super().__init__(ctx)
self.source_report = SourceReport()
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> "FakeSource":
return FakeSource(ctx)
def get_report(self) -> SourceReport:
return self.source_report
def close(self) -> None:
return super().close()
def test_aspects_by_subtypes():
source = FakeSource(PipelineContext(run_id="test_aspects_by_subtypes"))
for wu in source.get_workunits_internal():
source.source_report.report_workunit(wu)
source.source_report.compute_stats()
assert source.source_report.get_aspects_by_subtypes_dict() == {
"dataset": {
"unknown": {"status": 1},
}
}
source.source_report.report_workunit(
MetadataChangeProposalWrapper(
entityUrn=_get_urn(),
aspect=SubTypesClass(typeNames=["Table"]),
).as_workunit()
)
source.source_report.compute_stats()
assert source.source_report.get_aspects_by_subtypes_dict() == {
"dataset": {
"Table": {"status": 1, "subTypes": 1},
}
}
def test_discretize_dict_values():
"""Test the _discretize_dict_values static method."""
test_dict = {
"dataset": {
"schemaMetadata": 5,
"status": 12,
"ownership": 3,
},
"chart": {
"status": 8,
"ownership": 1,
},
}
result = SourceReport._discretize_dict_values(test_dict)
assert result == {
"dataset": {
"schemaMetadata": 4,
"status": 8,
"ownership": 2,
},
"chart": {
"status": 8,
"ownership": 1,
},
}