docs(sdk): improve sdk examples + docs (#14507)

This commit is contained in:
Harshal Sheth 2025-08-21 10:22:08 -07:00 committed by GitHub
parent 2cad5ddcb3
commit 653952e714
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 22 additions and 36 deletions

View File

@ -1,13 +1,11 @@
from datahub.emitter.mcp_builder import ContainerKey
from datahub.emitter.mcp_builder import DatabaseKey
from datahub.sdk import Container, DataHubClient
client = DataHubClient.from_env()
# datajob will inherit the platform and platform instance from the flow
container = Container(
container_key=ContainerKey(platform="mlflow", name="airline_forecast_experiment"),
display_name="Airline Forecast Experiment",
container_key=DatabaseKey(platform="snowflake", database="my_database"),
display_name="MY_DATABASE",
)
client.entities.upsert(container)

View File

@ -5,10 +5,10 @@ client = DataHubClient.from_env()
dataset = client.entities.get(DatasetUrn(platform="snowflake", name="example_dataset"))
# if you don't know the domain id, you can get it from resolve client by name
# If you don't know the domain urn, you can look it up:
# domain_urn = client.resolve.domain(name="marketing")
# NOTE : This will overwrite the existing domain
# NOTE: This will overwrite the existing domain
dataset.set_domain(DomainUrn(id="marketing"))
client.entities.update(dataset)

View File

@ -7,4 +7,8 @@ dataset = client.entities.get(
)
dataset.add_term(GlossaryTermUrn("Classification.HighlyConfidential"))
# Or, if you know the term name but not the term urn:
term_urn = client.resolve.term(name="PII")
dataset.add_term(term_urn)
client.entities.update(dataset)

View File

@ -165,11 +165,7 @@ class LineageClient:
] = False,
transformation_text: Optional[str] = None,
) -> None:
...
"""
Add dataset-to-dataset lineage with column-level mapping.
"""
"""Add dataset-to-dataset lineage with column-level mapping."""
@overload
def add_lineage(
@ -178,11 +174,7 @@ class LineageClient:
upstream: Union[DatajobUrnOrStr],
downstream: DatasetUrnOrStr,
) -> None:
...
"""
Add dataset-to-datajob or dataset-to-mlmodel lineage.
"""
"""Add dataset-to-datajob or dataset-to-mlmodel lineage."""
@overload
def add_lineage(
@ -191,11 +183,7 @@ class LineageClient:
upstream: Union[DatasetUrnOrStr, DatajobUrnOrStr],
downstream: DatajobUrnOrStr,
) -> None:
...
"""
Add datajob-to-dataset or datajob-to-datajob lineage.
"""
"""Add datajob-to-dataset or datajob-to-datajob lineage."""
@overload
def add_lineage(
@ -204,11 +192,7 @@ class LineageClient:
upstream: Union[DashboardUrnOrStr, DatasetUrnOrStr, ChartUrnOrStr],
downstream: DashboardUrnOrStr,
) -> None:
...
"""
Add dashboard-to-dashboard or dashboard-to-dataset lineage.
"""
"""Add dashboard-to-dashboard or dashboard-to-dataset lineage."""
@overload
def add_lineage(
@ -217,10 +201,7 @@ class LineageClient:
upstream: DatasetUrnOrStr,
downstream: ChartUrnOrStr,
) -> None:
...
"""
Add dataset-to-chart lineage.
"""
"""Add dataset-to-chart lineage."""
# The actual implementation that handles all overloaded cases
def add_lineage(
@ -237,8 +218,7 @@ class LineageClient:
] = False,
transformation_text: Optional[str] = None,
) -> None:
"""
Add lineage between two entities.
"""Add lineage between two entities.
This flexible method handles different combinations of entity types:
- dataset to dataset

View File

@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Optional, overload
from typing import TYPE_CHECKING, Optional, overload
from datahub.errors import SdkUsageError
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
@ -9,6 +9,9 @@ from datahub.sdk.entity_client import EntityClient
from datahub.sdk.lineage_client import LineageClient
from datahub.sdk.search_client import SearchClient
if TYPE_CHECKING:
from datahub.sdk.resolver_client import ResolverClient
class DataHubClient:
"""Main client for interacting with DataHub.
@ -104,13 +107,14 @@ class DataHubClient:
return EntityClient(self)
@property
def resolve(self): # type: ignore[report-untyped-call] # Not available due to circular import issues
def resolve(self) -> "ResolverClient":
try:
from acryl_datahub_cloud.sdk import ( # type: ignore[import-not-found]
ResolverClient,
)
except ImportError:
from datahub.sdk.resolver_client import ( # type: ignore[assignment] # If the client is not installed, use the one from the SDK
# If the client is not installed, use the one from the SDK.
from datahub.sdk.resolver_client import ( # type: ignore[assignment]
ResolverClient,
)
return ResolverClient(self)