feat(ingest): add urn -> url helpers (#13410)

This commit is contained in:
Harshal Sheth 2025-05-02 19:54:01 -07:00 committed by GitHub
parent 24f9bc0f18
commit f83460255a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 78 additions and 3 deletions

View File

@ -147,14 +147,14 @@ great_expectations_lib = {
"acryl-great-expectations==0.15.50.1",
}
sql_common_slim = {
sqlalchemy_lib = {
# Required for all SQL sources.
# This is temporary lower bound that we're open to loosening/tightening as requirements show up
"sqlalchemy>=1.4.39, <2",
}
sql_common = (
{
*sql_common_slim,
*sqlalchemy_lib,
# Required for SQL profiling.
*great_expectations_lib,
"pydantic<2", # keeping this for now, but can be removed eventually
@ -551,7 +551,7 @@ plugins: Dict[str, Set[str]] = {
"unity-catalog": databricks | sql_common,
# databricks is alias for unity-catalog and needs to be kept in sync
"databricks": databricks | sql_common,
"fivetran": snowflake_common | bigquery_common | sqlglot_lib,
"fivetran": snowflake_common | bigquery_common | sqlalchemy_lib | sqlglot_lib,
"qlik-sense": sqlglot_lib | {"requests", "websocket-client"},
"sigma": sqlglot_lib | {"requests"},
"sac": sac,

View File

@ -54,6 +54,7 @@ from datahub.ingestion.graph.filters import (
RemovedStatusFilter,
generate_filter,
)
from datahub.ingestion.graph.links import make_url_for_urn
from datahub.ingestion.source.state.checkpoint import Checkpoint
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
MetadataChangeEvent,
@ -187,6 +188,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
"""Get the public-facing base url of the frontend
This url can be used to construct links to the frontend. The url will not include a trailing slash.
Note: Only supported with DataHub Cloud.
"""
@ -198,6 +200,20 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
raise ValueError("baseUrl not found in server config")
return base_url
def url_for(self, entity_urn: Union[str, Urn]) -> str:
"""Get the UI url for an entity.
Note: Only supported with DataHub Cloud.
Args:
entity_urn: The urn of the entity to get the url for.
Returns:
The public-facing url for the entity.
"""
return make_url_for_urn(self.frontend_base_url, str(entity_urn))
@classmethod
def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph":
session_config = emitter._session_config

View File

@ -0,0 +1,53 @@
from typing import Optional
import datahub.metadata.urns as urns
from datahub.utilities.urns.urn import guess_entity_type
_url_prefixes = {
# Atypical mappings.
urns.DataJobUrn.ENTITY_TYPE: "tasks",
urns.DataFlowUrn.ENTITY_TYPE: "pipelines",
urns.CorpUserUrn.ENTITY_TYPE: "user",
urns.CorpGroupUrn.ENTITY_TYPE: "group",
# Normal mappings - matches the entity type.
urns.ChartUrn.ENTITY_TYPE: "chart",
urns.ContainerUrn.ENTITY_TYPE: "container",
urns.DataProductUrn.ENTITY_TYPE: "dataProduct",
urns.DatasetUrn.ENTITY_TYPE: "dataset",
urns.DashboardUrn.ENTITY_TYPE: "dashboard",
urns.DomainUrn.ENTITY_TYPE: "domain",
urns.GlossaryNodeUrn.ENTITY_TYPE: "glossaryNode",
urns.GlossaryTermUrn.ENTITY_TYPE: "glossaryTerm",
urns.TagUrn.ENTITY_TYPE: "tag",
}
def make_url_for_urn(
frontend_base_url: str,
entity_urn: str,
*,
tab: Optional[str] = None,
) -> str:
"""Build the public-facing URL for an entity urn.
Args:
frontend_url: The public-facing base url of the frontend.
entity_urn: The urn of the entity to get the url for.
tab: The tab to deep link into. If not provided, the default tab for the entity will be shown.
Returns:
The public-facing url for the entity.
Examples:
>>> make_url_for_urn("https://demo.datahub.com", "urn:li:container:b41c14bc5cb3ccfbb0433c8cbdef2992", tab="Contents")
'https://demo.datahub.com/container/urn:li:container:b41c14bc5cb3ccfbb0433c8cbdef2992/Contents'
>>> make_url_for_urn("https://demo.datahub.com", "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.actuating,PROD)")
'https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.actuating,PROD)/'
"""
entity_type = guess_entity_type(entity_urn)
url_prefix = _url_prefixes.get(entity_type, entity_type)
url = f"{frontend_base_url}/{url_prefix}/{entity_urn}/"
if tab:
url += f"{tab}"
return url

View File

@ -0,0 +1,6 @@
import datahub.ingestion.graph.links as links
from datahub.testing.doctest import assert_doctest
def test_links() -> None:
assert_doctest(links)