feat(cli): add container CLI to apply patches for tag, term, owner (#12418)

This commit is contained in:
Aseem Bansal 2025-01-22 20:41:00 +05:30 committed by GitHub
parent 96758e2eb6
commit b75d3ed5dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 125 additions and 18 deletions

View File

@ -57,24 +57,30 @@ Options:
--help Show this message and exit.
Commands:
actions <disabled due to missing dependencies>
check Helper commands for checking various aspects of DataHub.
dataproduct A group of commands to interact with the DataProduct entity in DataHub.
delete Delete metadata from datahub using a single urn or a combination of filters
docker Helper commands for setting up and interacting with a local DataHub instance using Docker.
exists A group of commands to check existence of entities in DataHub.
get A group of commands to get metadata from DataHub.
group A group of commands to interact with the Group entity in DataHub.
ingest Ingest metadata into DataHub.
init Configure which datahub instance to connect to
lite A group of commands to work with a DataHub Lite instance
migrate Helper commands for migrating metadata within DataHub.
put A group of commands to put metadata in DataHub.
state Managed state stored in DataHub by stateful ingestion.
telemetry Toggle telemetry.
timeline Get timeline for an entity based on certain categories
user A group of commands to interact with the User entity in DataHub.
version Print version number and exit.
actions <disabled due to missing dependencies>
assertions A group of commands to interact with the Assertion entity in DataHub.
check Helper commands for checking various aspects of DataHub.
container A group of commands to interact with containers in DataHub.
datacontract A group of commands to interact with the DataContract entity in DataHub.
dataproduct A group of commands to interact with the DataProduct entity in DataHub.
dataset A group of commands to interact with the Dataset entity in DataHub.
delete Delete metadata from DataHub.
docker Helper commands for setting up and interacting with a local DataHub instance using Docker.
exists A group of commands to check existence of entities in DataHub.
forms A group of commands to interact with forms in DataHub.
get A group of commands to get metadata from DataHub.
group A group of commands to interact with the Group entity in DataHub.
ingest Ingest metadata into DataHub.
init Configure which datahub instance to connect to
lite A group of commands to work with a DataHub Lite instance
migrate Helper commands for migrating metadata within DataHub.
properties A group of commands to interact with structured properties in DataHub.
put A group of commands to put metadata in DataHub.
state Managed state stored in DataHub by stateful ingestion.
telemetry Toggle telemetry.
timeline Get timeline for an entity based on certain categories
user A group of commands to interact with the User entity in DataHub.
version Print version number and exit.
```
The following top-level commands listed below are here mainly to give the reader a high-level picture of what are the kinds of things you can accomplish with the cli.
@ -274,6 +280,16 @@ DATAHUB_TELEMETRY_TIMEOUT=10
DATAHUB_DEBUG=false
```
### container
A group of commands to interact with containers in DataHub.
e.g. You can use this to apply a tag to all datasets recursively in this container.
```shell
datahub container tag --container-urn "urn:li:container:0e9e46bd6d5cf645f33d5a8f0254bc2d" --tag-urn "urn:li:tag:tag1"
```
### check
The datahub package is composed of different plugins that allow you to connect to different metadata sources and ingest metadata from them.

View File

@ -0,0 +1,89 @@
import logging
from typing import List
import click
from datahub.ingestion.graph.client import get_default_graph
from datahub.metadata.schema_classes import (
GlossaryTermAssociationClass,
OwnerClass,
OwnershipTypeClass,
TagAssociationClass,
)
from datahub.specific.dataset import DatasetPatchBuilder
logger = logging.getLogger(__name__)
@click.group()
def container() -> None:
"""A group of commands to interact with containers in DataHub."""
pass
def apply_association_to_container(
container_urn: str,
association_urn: str,
association_type: str,
) -> None:
"""
Common function to add either tags, terms, or owners to child datasets (for now).
Args:
container_urn: The URN of the container
association_urn: The URN of the tag, term, or user to apply
association_type: One of 'tag', 'term', or 'owner'
"""
urns: List[str] = []
graph = get_default_graph()
logger.info(f"Using {graph}")
urns.extend(
graph.get_urns_by_filter(
container=container_urn, batch_size=1000, entity_types=["dataset"]
)
)
for urn in urns:
logger.info(f"Adding {association_type} {association_urn} to {urn}")
builder = DatasetPatchBuilder(urn)
if association_type == "tag":
patches = builder.add_tag(TagAssociationClass(association_urn)).build()
elif association_type == "term":
patches = builder.add_term(
GlossaryTermAssociationClass(association_urn)
).build()
elif association_type == "owner":
patches = builder.add_owner(
OwnerClass(
owner=association_urn,
type=OwnershipTypeClass.TECHNICAL_OWNER,
)
).build()
for mcp in patches:
graph.emit(mcp)
@container.command()
@click.option("--container-urn", required=True, type=str)
@click.option("--tag-urn", required=True, type=str)
def tag(container_urn: str, tag_urn: str) -> None:
"""Add patch to add a tag to all datasets in a container"""
apply_association_to_container(container_urn, tag_urn, "tag")
@container.command()
@click.option("--container-urn", required=True, type=str)
@click.option("--term-urn", required=True, type=str)
def term(container_urn: str, term_urn: str) -> None:
"""Add patch to add a term to all datasets in a container"""
apply_association_to_container(container_urn, term_urn, "term")
@container.command()
@click.option("--container-urn", required=True, type=str)
@click.option("--owner-id", required=True, type=str)
def owner(container_urn: str, owner_id: str) -> None:
"""Add patch to add a owner to all datasets in a container"""
apply_association_to_container(container_urn, owner_id, "owner")

View File

@ -14,6 +14,7 @@ from datahub.cli.cli_utils import (
make_shim_command,
)
from datahub.cli.config_utils import DATAHUB_CONFIG_PATH, write_gms_config
from datahub.cli.container_cli import container
from datahub.cli.delete_cli import delete
from datahub.cli.docker_cli import docker
from datahub.cli.env_utils import get_boolean_env_variable
@ -180,6 +181,7 @@ datahub.add_command(properties)
datahub.add_command(forms)
datahub.add_command(datacontract)
datahub.add_command(assertions)
datahub.add_command(container)
try:
from datahub.cli.lite_cli import lite