feat(cli): remove inconsistency check command (#6569)

This commit is contained in:
Aseem Bansal 2022-11-30 02:53:21 +05:30 committed by GitHub
parent 6dd6bfc795
commit 329ecb8958
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1 additions and 52 deletions

View File

@ -5,6 +5,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
## Next
### Breaking Changes
- `datahub check graph-consistency` command has been removed. It was a beta API that we had considered but decided there are better solutions for this. So removing this.
### Potential Downtime

View File

@ -4,9 +4,7 @@ import tempfile
import click
from datahub import __package_name__
from datahub.cli.cli_utils import get_url_and_token
from datahub.cli.json_file import check_mce_file
from datahub.graph_consistency import check_data_platform
from datahub.ingestion.run.pipeline import Pipeline
from datahub.ingestion.sink.sink_registry import sink_registry
from datahub.ingestion.source.source_registry import source_registry
@ -89,9 +87,3 @@ def plugins(verbose: bool) -> None:
click.echo(
f"If a plugin is disabled, try running: pip install '{__package_name__}[<plugin>]'"
)
@check.command()
def graph_consistency() -> None:
gms_endpoint, gms_token = get_url_and_token()
check_data_platform.check(gms_endpoint, gms_token)

View File

@ -1,44 +0,0 @@
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
def check(gms_endpoint, gms_token):
ASPECT_NAME = "dataPlatformInstance"
graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint, token=gms_token))
search_results = graph.get_search_results()["value"]["metadata"]["aggregations"]
platform_search_result = list(
filter(lambda x: x["displayName"] == "Platform", search_results)
)
expected_total = graph.get_aspect_counts(ASPECT_NAME, "urn:li:dataset%")
actual_total = 0
platform_counts = {}
if len(platform_search_result) > 0:
platform_aggregation = platform_search_result[0]["filterValues"]
for aggregation in platform_aggregation:
platform_name = aggregation["entity"]
facet_count = aggregation["facetCount"]
actual_total += facet_count
platform_counts[platform_name] = facet_count
expected_facet_count = graph.get_aspect_counts(
ASPECT_NAME, f"urn:li:dataset%{platform_name}%"
)
if facet_count != expected_facet_count:
missing_percent = round(
(expected_facet_count - facet_count) * 100 / expected_facet_count, 2
)
print(
f"[WARN {ASPECT_NAME}] Expected to have {expected_facet_count} but found {facet_count} for {platform_name}. Missing % = {missing_percent}"
)
if expected_total != actual_total:
missing_percent = round(
(expected_total - actual_total) * 100 / expected_total, 2
)
print(
f"[WARN {ASPECT_NAME}] Expected to have {expected_total} but found {actual_total}. Missing % = {missing_percent}"
)
else:
print(f"[SUCCESS {ASPECT_NAME}] Expected and actual are {expected_total}")