mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-07 16:57:52 +00:00
102 lines
2.7 KiB
Python
102 lines
2.7 KiB
Python
from typing import Iterable
|
|
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
from datahub.ingestion.api.common import PipelineContext
|
|
from datahub.ingestion.api.source import Source, SourceReport
|
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
from datahub.metadata.schema_classes import (
|
|
StatusClass,
|
|
SubTypesClass,
|
|
)
|
|
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
|
|
|
|
|
def _get_urn() -> str:
|
|
return str(
|
|
DatasetUrn.create_from_ids(
|
|
platform_id="elasticsearch",
|
|
table_name="fooIndex",
|
|
env="PROD",
|
|
)
|
|
)
|
|
|
|
|
|
class FakeSource(Source):
|
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
return [
|
|
MetadataWorkUnit(
|
|
id="test-workunit",
|
|
mcp=MetadataChangeProposalWrapper(
|
|
entityUrn=_get_urn(),
|
|
aspect=StatusClass(removed=False),
|
|
),
|
|
)
|
|
]
|
|
|
|
def __init__(self, ctx: PipelineContext):
|
|
super().__init__(ctx)
|
|
self.source_report = SourceReport()
|
|
|
|
@classmethod
|
|
def create(cls, config_dict: dict, ctx: PipelineContext) -> "FakeSource":
|
|
return FakeSource(ctx)
|
|
|
|
def get_report(self) -> SourceReport:
|
|
return self.source_report
|
|
|
|
def close(self) -> None:
|
|
return super().close()
|
|
|
|
|
|
def test_aspects_by_subtypes():
|
|
source = FakeSource(PipelineContext(run_id="test_aspects_by_subtypes"))
|
|
for wu in source.get_workunits_internal():
|
|
source.source_report.report_workunit(wu)
|
|
|
|
source.source_report.compute_stats()
|
|
assert source.source_report.get_aspects_by_subtypes_dict() == {
|
|
"dataset": {
|
|
"unknown": {"status": 1},
|
|
}
|
|
}
|
|
source.source_report.report_workunit(
|
|
MetadataChangeProposalWrapper(
|
|
entityUrn=_get_urn(),
|
|
aspect=SubTypesClass(typeNames=["Table"]),
|
|
).as_workunit()
|
|
)
|
|
source.source_report.compute_stats()
|
|
assert source.source_report.get_aspects_by_subtypes_dict() == {
|
|
"dataset": {
|
|
"Table": {"status": 1, "subTypes": 1},
|
|
}
|
|
}
|
|
|
|
|
|
def test_discretize_dict_values():
|
|
"""Test the _discretize_dict_values static method."""
|
|
test_dict = {
|
|
"dataset": {
|
|
"schemaMetadata": 5,
|
|
"status": 12,
|
|
"ownership": 3,
|
|
},
|
|
"chart": {
|
|
"status": 8,
|
|
"ownership": 1,
|
|
},
|
|
}
|
|
|
|
result = SourceReport._discretize_dict_values(test_dict)
|
|
assert result == {
|
|
"dataset": {
|
|
"schemaMetadata": 4,
|
|
"status": 8,
|
|
"ownership": 2,
|
|
},
|
|
"chart": {
|
|
"status": 8,
|
|
"ownership": 1,
|
|
},
|
|
}
|