diff --git a/metadata-ingestion/source_docs/looker.md b/metadata-ingestion/source_docs/looker.md index 99365aaad2..8b36a21d35 100644 --- a/metadata-ingestion/source_docs/looker.md +++ b/metadata-ingestion/source_docs/looker.md @@ -72,7 +72,8 @@ Note that a `.` is used to denote nested fields in the YAML recipe. | ------------------------- | -------- | ----------------------- | ------------------------------------------------------------------------------------------------------------ | | `client_id` | ✅ | | Looker API3 client ID. | | `client_secret` | ✅ | | Looker API3 client secret. | -| `base_url` | ✅ | | Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. | +| `base_url` | ✅ | | Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. Used for making API calls to Looker and constructing clickable dashboard and chart urls. | +| `external_base_url` | | value of `base_url` | Optional URL to use when constructing external URLs to Looker if the `base_url` is not the correct one to use. For example, `https://looker-public.company.com`. If not provided, the external base URL will default to `base_url`. | | `platform_name` | | `"looker"` | Platform to use in namespace when constructing URNs. | | `extract_owners` | | `True` | When enabled, extracts ownership from Looker directly. When disabled, ownership is left empty for dashboards and charts. | | `strip_user_ids_from_email` | | `False` | When enabled, converts Looker user emails of the form name@domain.com to urn:li:corpuser:name when assigning ownership | diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker.py b/metadata-ingestion/src/datahub/ingestion/source/looker.py index ec7f3d7473..e2001ab8fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker.py @@ -29,6 +29,7 @@ from looker_sdk.sdk.api31.models import ( Query, User, ) +from pydantic import validator import datahub.emitter.mce_builder as builder from datahub.configuration import ConfigModel @@ -102,6 +103,13 @@ class LookerDashboardSourceConfig(LookerAPIConfig, LookerCommonConfig): strip_user_ids_from_email: bool = False skip_personal_folders: bool = False max_threads: int = os.cpu_count() or 40 + external_base_url: Optional[str] + + @validator("external_base_url", pre=True, always=True) + def external_url_defaults_to_api_config_base_url( + cls, v: Optional[str], *, values: Dict[str, Any], **kwargs: Dict[str, Any] + ) -> str: + return v or values["base_url"] @dataclass @@ -526,13 +534,12 @@ class LookerDashboardSource(Source): ) chart_type = self._get_chart_type(dashboard_element) - chart_info = ChartInfoClass( type=chart_type, description=dashboard_element.description or "", title=dashboard_element.title or "", lastModified=ChangeAuditStamps(), - chartUrl=dashboard_element.url(self.source_config.base_url), + chartUrl=dashboard_element.url(self.source_config.external_base_url or ""), inputs=dashboard_element.get_view_urns(self.source_config), customProperties={ "upstream_fields": ",".join( @@ -618,7 +625,7 @@ class LookerDashboardSource(Source): title=looker_dashboard.title, charts=[mce.proposedSnapshot.urn for mce in chart_mces], lastModified=ChangeAuditStamps(), - dashboardUrl=looker_dashboard.url(self.source_config.base_url), + dashboardUrl=looker_dashboard.url(self.source_config.external_base_url), ) dashboard_snapshot.aspects.append(dashboard_info)