mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-27 01:48:24 +00:00
docs(sdk): improve sdk examples + docs (#14507)
This commit is contained in:
parent
2cad5ddcb3
commit
653952e714
@ -1,13 +1,11 @@
|
||||
from datahub.emitter.mcp_builder import ContainerKey
|
||||
from datahub.emitter.mcp_builder import DatabaseKey
|
||||
from datahub.sdk import Container, DataHubClient
|
||||
|
||||
client = DataHubClient.from_env()
|
||||
|
||||
# datajob will inherit the platform and platform instance from the flow
|
||||
|
||||
container = Container(
|
||||
container_key=ContainerKey(platform="mlflow", name="airline_forecast_experiment"),
|
||||
display_name="Airline Forecast Experiment",
|
||||
container_key=DatabaseKey(platform="snowflake", database="my_database"),
|
||||
display_name="MY_DATABASE",
|
||||
)
|
||||
|
||||
client.entities.upsert(container)
|
||||
|
||||
@ -5,10 +5,10 @@ client = DataHubClient.from_env()
|
||||
|
||||
dataset = client.entities.get(DatasetUrn(platform="snowflake", name="example_dataset"))
|
||||
|
||||
# if you don't know the domain id, you can get it from resolve client by name
|
||||
# If you don't know the domain urn, you can look it up:
|
||||
# domain_urn = client.resolve.domain(name="marketing")
|
||||
|
||||
# NOTE : This will overwrite the existing domain
|
||||
# NOTE: This will overwrite the existing domain
|
||||
dataset.set_domain(DomainUrn(id="marketing"))
|
||||
|
||||
client.entities.update(dataset)
|
||||
|
||||
@ -7,4 +7,8 @@ dataset = client.entities.get(
|
||||
)
|
||||
dataset.add_term(GlossaryTermUrn("Classification.HighlyConfidential"))
|
||||
|
||||
# Or, if you know the term name but not the term urn:
|
||||
term_urn = client.resolve.term(name="PII")
|
||||
dataset.add_term(term_urn)
|
||||
|
||||
client.entities.update(dataset)
|
||||
|
||||
@ -165,11 +165,7 @@ class LineageClient:
|
||||
] = False,
|
||||
transformation_text: Optional[str] = None,
|
||||
) -> None:
|
||||
...
|
||||
|
||||
"""
|
||||
Add dataset-to-dataset lineage with column-level mapping.
|
||||
"""
|
||||
"""Add dataset-to-dataset lineage with column-level mapping."""
|
||||
|
||||
@overload
|
||||
def add_lineage(
|
||||
@ -178,11 +174,7 @@ class LineageClient:
|
||||
upstream: Union[DatajobUrnOrStr],
|
||||
downstream: DatasetUrnOrStr,
|
||||
) -> None:
|
||||
...
|
||||
|
||||
"""
|
||||
Add dataset-to-datajob or dataset-to-mlmodel lineage.
|
||||
"""
|
||||
"""Add dataset-to-datajob or dataset-to-mlmodel lineage."""
|
||||
|
||||
@overload
|
||||
def add_lineage(
|
||||
@ -191,11 +183,7 @@ class LineageClient:
|
||||
upstream: Union[DatasetUrnOrStr, DatajobUrnOrStr],
|
||||
downstream: DatajobUrnOrStr,
|
||||
) -> None:
|
||||
...
|
||||
|
||||
"""
|
||||
Add datajob-to-dataset or datajob-to-datajob lineage.
|
||||
"""
|
||||
"""Add datajob-to-dataset or datajob-to-datajob lineage."""
|
||||
|
||||
@overload
|
||||
def add_lineage(
|
||||
@ -204,11 +192,7 @@ class LineageClient:
|
||||
upstream: Union[DashboardUrnOrStr, DatasetUrnOrStr, ChartUrnOrStr],
|
||||
downstream: DashboardUrnOrStr,
|
||||
) -> None:
|
||||
...
|
||||
|
||||
"""
|
||||
Add dashboard-to-dashboard or dashboard-to-dataset lineage.
|
||||
"""
|
||||
"""Add dashboard-to-dashboard or dashboard-to-dataset lineage."""
|
||||
|
||||
@overload
|
||||
def add_lineage(
|
||||
@ -217,10 +201,7 @@ class LineageClient:
|
||||
upstream: DatasetUrnOrStr,
|
||||
downstream: ChartUrnOrStr,
|
||||
) -> None:
|
||||
...
|
||||
"""
|
||||
Add dataset-to-chart lineage.
|
||||
"""
|
||||
"""Add dataset-to-chart lineage."""
|
||||
|
||||
# The actual implementation that handles all overloaded cases
|
||||
def add_lineage(
|
||||
@ -237,8 +218,7 @@ class LineageClient:
|
||||
] = False,
|
||||
transformation_text: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Add lineage between two entities.
|
||||
"""Add lineage between two entities.
|
||||
|
||||
This flexible method handles different combinations of entity types:
|
||||
- dataset to dataset
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, overload
|
||||
from typing import TYPE_CHECKING, Optional, overload
|
||||
|
||||
from datahub.errors import SdkUsageError
|
||||
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
||||
@ -9,6 +9,9 @@ from datahub.sdk.entity_client import EntityClient
|
||||
from datahub.sdk.lineage_client import LineageClient
|
||||
from datahub.sdk.search_client import SearchClient
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datahub.sdk.resolver_client import ResolverClient
|
||||
|
||||
|
||||
class DataHubClient:
|
||||
"""Main client for interacting with DataHub.
|
||||
@ -104,13 +107,14 @@ class DataHubClient:
|
||||
return EntityClient(self)
|
||||
|
||||
@property
|
||||
def resolve(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
|
||||
def resolve(self) -> "ResolverClient":
|
||||
try:
|
||||
from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
|
||||
ResolverClient,
|
||||
)
|
||||
except ImportError:
|
||||
from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
|
||||
# If the client is not installed, use the one from the SDK.
|
||||
from datahub.sdk.resolver_client import ( # type: ignore[assignment]
|
||||
ResolverClient,
|
||||
)
|
||||
return ResolverClient(self)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user