mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-25 00:48:45 +00:00
feat(cli): add container CLI to apply patches for tag, term, owner (#12418)
This commit is contained in:
parent
96758e2eb6
commit
b75d3ed5dc
52
docs/cli.md
52
docs/cli.md
@ -57,24 +57,30 @@ Options:
|
||||
--help Show this message and exit.
|
||||
|
||||
Commands:
|
||||
actions <disabled due to missing dependencies>
|
||||
check Helper commands for checking various aspects of DataHub.
|
||||
dataproduct A group of commands to interact with the DataProduct entity in DataHub.
|
||||
delete Delete metadata from datahub using a single urn or a combination of filters
|
||||
docker Helper commands for setting up and interacting with a local DataHub instance using Docker.
|
||||
exists A group of commands to check existence of entities in DataHub.
|
||||
get A group of commands to get metadata from DataHub.
|
||||
group A group of commands to interact with the Group entity in DataHub.
|
||||
ingest Ingest metadata into DataHub.
|
||||
init Configure which datahub instance to connect to
|
||||
lite A group of commands to work with a DataHub Lite instance
|
||||
migrate Helper commands for migrating metadata within DataHub.
|
||||
put A group of commands to put metadata in DataHub.
|
||||
state Managed state stored in DataHub by stateful ingestion.
|
||||
telemetry Toggle telemetry.
|
||||
timeline Get timeline for an entity based on certain categories
|
||||
user A group of commands to interact with the User entity in DataHub.
|
||||
version Print version number and exit.
|
||||
actions <disabled due to missing dependencies>
|
||||
assertions A group of commands to interact with the Assertion entity in DataHub.
|
||||
check Helper commands for checking various aspects of DataHub.
|
||||
container A group of commands to interact with containers in DataHub.
|
||||
datacontract A group of commands to interact with the DataContract entity in DataHub.
|
||||
dataproduct A group of commands to interact with the DataProduct entity in DataHub.
|
||||
dataset A group of commands to interact with the Dataset entity in DataHub.
|
||||
delete Delete metadata from DataHub.
|
||||
docker Helper commands for setting up and interacting with a local DataHub instance using Docker.
|
||||
exists A group of commands to check existence of entities in DataHub.
|
||||
forms A group of commands to interact with forms in DataHub.
|
||||
get A group of commands to get metadata from DataHub.
|
||||
group A group of commands to interact with the Group entity in DataHub.
|
||||
ingest Ingest metadata into DataHub.
|
||||
init Configure which datahub instance to connect to
|
||||
lite A group of commands to work with a DataHub Lite instance
|
||||
migrate Helper commands for migrating metadata within DataHub.
|
||||
properties A group of commands to interact with structured properties in DataHub.
|
||||
put A group of commands to put metadata in DataHub.
|
||||
state Managed state stored in DataHub by stateful ingestion.
|
||||
telemetry Toggle telemetry.
|
||||
timeline Get timeline for an entity based on certain categories
|
||||
user A group of commands to interact with the User entity in DataHub.
|
||||
version Print version number and exit.
|
||||
```
|
||||
|
||||
The following top-level commands listed below are here mainly to give the reader a high-level picture of what are the kinds of things you can accomplish with the cli.
|
||||
@ -274,6 +280,16 @@ DATAHUB_TELEMETRY_TIMEOUT=10
|
||||
DATAHUB_DEBUG=false
|
||||
```
|
||||
|
||||
### container
|
||||
|
||||
A group of commands to interact with containers in DataHub.
|
||||
|
||||
e.g. You can use this to apply a tag to all datasets recursively in this container.
|
||||
```shell
|
||||
datahub container tag --container-urn "urn:li:container:0e9e46bd6d5cf645f33d5a8f0254bc2d" --tag-urn "urn:li:tag:tag1"
|
||||
|
||||
```
|
||||
|
||||
### check
|
||||
|
||||
The datahub package is composed of different plugins that allow you to connect to different metadata sources and ingest metadata from them.
|
||||
|
||||
89
metadata-ingestion/src/datahub/cli/container_cli.py
Normal file
89
metadata-ingestion/src/datahub/cli/container_cli.py
Normal file
@ -0,0 +1,89 @@
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import click
|
||||
|
||||
from datahub.ingestion.graph.client import get_default_graph
|
||||
from datahub.metadata.schema_classes import (
|
||||
GlossaryTermAssociationClass,
|
||||
OwnerClass,
|
||||
OwnershipTypeClass,
|
||||
TagAssociationClass,
|
||||
)
|
||||
from datahub.specific.dataset import DatasetPatchBuilder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@click.group()
|
||||
def container() -> None:
|
||||
"""A group of commands to interact with containers in DataHub."""
|
||||
pass
|
||||
|
||||
|
||||
def apply_association_to_container(
|
||||
container_urn: str,
|
||||
association_urn: str,
|
||||
association_type: str,
|
||||
) -> None:
|
||||
"""
|
||||
Common function to add either tags, terms, or owners to child datasets (for now).
|
||||
|
||||
Args:
|
||||
container_urn: The URN of the container
|
||||
association_urn: The URN of the tag, term, or user to apply
|
||||
association_type: One of 'tag', 'term', or 'owner'
|
||||
"""
|
||||
urns: List[str] = []
|
||||
graph = get_default_graph()
|
||||
logger.info(f"Using {graph}")
|
||||
urns.extend(
|
||||
graph.get_urns_by_filter(
|
||||
container=container_urn, batch_size=1000, entity_types=["dataset"]
|
||||
)
|
||||
)
|
||||
|
||||
for urn in urns:
|
||||
logger.info(f"Adding {association_type} {association_urn} to {urn}")
|
||||
builder = DatasetPatchBuilder(urn)
|
||||
|
||||
if association_type == "tag":
|
||||
patches = builder.add_tag(TagAssociationClass(association_urn)).build()
|
||||
elif association_type == "term":
|
||||
patches = builder.add_term(
|
||||
GlossaryTermAssociationClass(association_urn)
|
||||
).build()
|
||||
elif association_type == "owner":
|
||||
patches = builder.add_owner(
|
||||
OwnerClass(
|
||||
owner=association_urn,
|
||||
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
||||
)
|
||||
).build()
|
||||
|
||||
for mcp in patches:
|
||||
graph.emit(mcp)
|
||||
|
||||
|
||||
@container.command()
|
||||
@click.option("--container-urn", required=True, type=str)
|
||||
@click.option("--tag-urn", required=True, type=str)
|
||||
def tag(container_urn: str, tag_urn: str) -> None:
|
||||
"""Add patch to add a tag to all datasets in a container"""
|
||||
apply_association_to_container(container_urn, tag_urn, "tag")
|
||||
|
||||
|
||||
@container.command()
|
||||
@click.option("--container-urn", required=True, type=str)
|
||||
@click.option("--term-urn", required=True, type=str)
|
||||
def term(container_urn: str, term_urn: str) -> None:
|
||||
"""Add patch to add a term to all datasets in a container"""
|
||||
apply_association_to_container(container_urn, term_urn, "term")
|
||||
|
||||
|
||||
@container.command()
|
||||
@click.option("--container-urn", required=True, type=str)
|
||||
@click.option("--owner-id", required=True, type=str)
|
||||
def owner(container_urn: str, owner_id: str) -> None:
|
||||
"""Add patch to add a owner to all datasets in a container"""
|
||||
apply_association_to_container(container_urn, owner_id, "owner")
|
||||
@ -14,6 +14,7 @@ from datahub.cli.cli_utils import (
|
||||
make_shim_command,
|
||||
)
|
||||
from datahub.cli.config_utils import DATAHUB_CONFIG_PATH, write_gms_config
|
||||
from datahub.cli.container_cli import container
|
||||
from datahub.cli.delete_cli import delete
|
||||
from datahub.cli.docker_cli import docker
|
||||
from datahub.cli.env_utils import get_boolean_env_variable
|
||||
@ -180,6 +181,7 @@ datahub.add_command(properties)
|
||||
datahub.add_command(forms)
|
||||
datahub.add_command(datacontract)
|
||||
datahub.add_command(assertions)
|
||||
datahub.add_command(container)
|
||||
|
||||
try:
|
||||
from datahub.cli.lite_cli import lite
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user