diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index f437e175af..ee28c2ead2 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -147,14 +147,14 @@ great_expectations_lib = { "acryl-great-expectations==0.15.50.1", } -sql_common_slim = { +sqlalchemy_lib = { # Required for all SQL sources. # This is temporary lower bound that we're open to loosening/tightening as requirements show up "sqlalchemy>=1.4.39, <2", } sql_common = ( { - *sql_common_slim, + *sqlalchemy_lib, # Required for SQL profiling. *great_expectations_lib, "pydantic<2", # keeping this for now, but can be removed eventually @@ -551,7 +551,7 @@ plugins: Dict[str, Set[str]] = { "unity-catalog": databricks | sql_common, # databricks is alias for unity-catalog and needs to be kept in sync "databricks": databricks | sql_common, - "fivetran": snowflake_common | bigquery_common | sqlglot_lib, + "fivetran": snowflake_common | bigquery_common | sqlalchemy_lib | sqlglot_lib, "qlik-sense": sqlglot_lib | {"requests", "websocket-client"}, "sigma": sqlglot_lib | {"requests"}, "sac": sac, diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 24cb8fe1a6..043d61c6df 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -54,6 +54,7 @@ from datahub.ingestion.graph.filters import ( RemovedStatusFilter, generate_filter, ) +from datahub.ingestion.graph.links import make_url_for_urn from datahub.ingestion.source.state.checkpoint import Checkpoint from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( MetadataChangeEvent, @@ -187,6 +188,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI): """Get the public-facing base url of the frontend This url can be used to construct links to the frontend. The url will not include a trailing slash. + Note: Only supported with DataHub Cloud. """ @@ -198,6 +200,20 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI): raise ValueError("baseUrl not found in server config") return base_url + def url_for(self, entity_urn: Union[str, Urn]) -> str: + """Get the UI url for an entity. + + Note: Only supported with DataHub Cloud. + + Args: + entity_urn: The urn of the entity to get the url for. + + Returns: + The public-facing url for the entity. + """ + + return make_url_for_urn(self.frontend_base_url, str(entity_urn)) + @classmethod def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph": session_config = emitter._session_config diff --git a/metadata-ingestion/src/datahub/ingestion/graph/links.py b/metadata-ingestion/src/datahub/ingestion/graph/links.py new file mode 100644 index 0000000000..2334343da1 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/graph/links.py @@ -0,0 +1,53 @@ +from typing import Optional + +import datahub.metadata.urns as urns +from datahub.utilities.urns.urn import guess_entity_type + +_url_prefixes = { + # Atypical mappings. + urns.DataJobUrn.ENTITY_TYPE: "tasks", + urns.DataFlowUrn.ENTITY_TYPE: "pipelines", + urns.CorpUserUrn.ENTITY_TYPE: "user", + urns.CorpGroupUrn.ENTITY_TYPE: "group", + # Normal mappings - matches the entity type. + urns.ChartUrn.ENTITY_TYPE: "chart", + urns.ContainerUrn.ENTITY_TYPE: "container", + urns.DataProductUrn.ENTITY_TYPE: "dataProduct", + urns.DatasetUrn.ENTITY_TYPE: "dataset", + urns.DashboardUrn.ENTITY_TYPE: "dashboard", + urns.DomainUrn.ENTITY_TYPE: "domain", + urns.GlossaryNodeUrn.ENTITY_TYPE: "glossaryNode", + urns.GlossaryTermUrn.ENTITY_TYPE: "glossaryTerm", + urns.TagUrn.ENTITY_TYPE: "tag", +} + + +def make_url_for_urn( + frontend_base_url: str, + entity_urn: str, + *, + tab: Optional[str] = None, +) -> str: + """Build the public-facing URL for an entity urn. + + Args: + frontend_url: The public-facing base url of the frontend. + entity_urn: The urn of the entity to get the url for. + tab: The tab to deep link into. If not provided, the default tab for the entity will be shown. + + Returns: + The public-facing url for the entity. + + Examples: + >>> make_url_for_urn("https://demo.datahub.com", "urn:li:container:b41c14bc5cb3ccfbb0433c8cbdef2992", tab="Contents") + 'https://demo.datahub.com/container/urn:li:container:b41c14bc5cb3ccfbb0433c8cbdef2992/Contents' + >>> make_url_for_urn("https://demo.datahub.com", "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.actuating,PROD)") + 'https://demo.datahub.com/dataset/urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.actuating,PROD)/' + """ + entity_type = guess_entity_type(entity_urn) + + url_prefix = _url_prefixes.get(entity_type, entity_type) + url = f"{frontend_base_url}/{url_prefix}/{entity_urn}/" + if tab: + url += f"{tab}" + return url diff --git a/metadata-ingestion/tests/unit/sdk/test_links.py b/metadata-ingestion/tests/unit/sdk/test_links.py new file mode 100644 index 0000000000..e6b187db8b --- /dev/null +++ b/metadata-ingestion/tests/unit/sdk/test_links.py @@ -0,0 +1,6 @@ +import datahub.ingestion.graph.links as links +from datahub.testing.doctest import assert_doctest + + +def test_links() -> None: + assert_doctest(links)