mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-28 18:38:17 +00:00
feat(cli): remove inconsistency check command (#6569)
This commit is contained in:
parent
6dd6bfc795
commit
329ecb8958
@ -5,6 +5,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
|
||||
## Next
|
||||
|
||||
### Breaking Changes
|
||||
- `datahub check graph-consistency` command has been removed. It was a beta API that we had considered but decided there are better solutions for this. So removing this.
|
||||
|
||||
### Potential Downtime
|
||||
|
||||
|
||||
@ -4,9 +4,7 @@ import tempfile
|
||||
import click
|
||||
|
||||
from datahub import __package_name__
|
||||
from datahub.cli.cli_utils import get_url_and_token
|
||||
from datahub.cli.json_file import check_mce_file
|
||||
from datahub.graph_consistency import check_data_platform
|
||||
from datahub.ingestion.run.pipeline import Pipeline
|
||||
from datahub.ingestion.sink.sink_registry import sink_registry
|
||||
from datahub.ingestion.source.source_registry import source_registry
|
||||
@ -89,9 +87,3 @@ def plugins(verbose: bool) -> None:
|
||||
click.echo(
|
||||
f"If a plugin is disabled, try running: pip install '{__package_name__}[<plugin>]'"
|
||||
)
|
||||
|
||||
|
||||
@check.command()
|
||||
def graph_consistency() -> None:
|
||||
gms_endpoint, gms_token = get_url_and_token()
|
||||
check_data_platform.check(gms_endpoint, gms_token)
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
|
||||
|
||||
|
||||
def check(gms_endpoint, gms_token):
|
||||
ASPECT_NAME = "dataPlatformInstance"
|
||||
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint, token=gms_token))
|
||||
search_results = graph.get_search_results()["value"]["metadata"]["aggregations"]
|
||||
|
||||
platform_search_result = list(
|
||||
filter(lambda x: x["displayName"] == "Platform", search_results)
|
||||
)
|
||||
|
||||
expected_total = graph.get_aspect_counts(ASPECT_NAME, "urn:li:dataset%")
|
||||
actual_total = 0
|
||||
|
||||
platform_counts = {}
|
||||
if len(platform_search_result) > 0:
|
||||
platform_aggregation = platform_search_result[0]["filterValues"]
|
||||
for aggregation in platform_aggregation:
|
||||
platform_name = aggregation["entity"]
|
||||
facet_count = aggregation["facetCount"]
|
||||
actual_total += facet_count
|
||||
platform_counts[platform_name] = facet_count
|
||||
|
||||
expected_facet_count = graph.get_aspect_counts(
|
||||
ASPECT_NAME, f"urn:li:dataset%{platform_name}%"
|
||||
)
|
||||
if facet_count != expected_facet_count:
|
||||
missing_percent = round(
|
||||
(expected_facet_count - facet_count) * 100 / expected_facet_count, 2
|
||||
)
|
||||
print(
|
||||
f"[WARN {ASPECT_NAME}] Expected to have {expected_facet_count} but found {facet_count} for {platform_name}. Missing % = {missing_percent}"
|
||||
)
|
||||
|
||||
if expected_total != actual_total:
|
||||
missing_percent = round(
|
||||
(expected_total - actual_total) * 100 / expected_total, 2
|
||||
)
|
||||
print(
|
||||
f"[WARN {ASPECT_NAME}] Expected to have {expected_total} but found {actual_total}. Missing % = {missing_percent}"
|
||||
)
|
||||
else:
|
||||
print(f"[SUCCESS {ASPECT_NAME}] Expected and actual are {expected_total}")
|
||||
Loading…
x
Reference in New Issue
Block a user