diff --git a/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py index 103f7b83c95..cb5f4a71842 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py @@ -61,6 +61,9 @@ from metadata.generated.schema.entity.services.dashboardService import ( from metadata.generated.schema.metadataIngestion.workflow import ( Source as WorkflowSource, ) +from metadata.generated.schema.security.credentials.githubCredentials import ( + GitHubCredentials, +) from metadata.generated.schema.type.entityLineage import EntitiesEdge from metadata.generated.schema.type.entityReference import EntityReference from metadata.generated.schema.type.usageRequest import UsageRequest @@ -151,30 +154,51 @@ class LookerSource(DashboardServiceSource): @property def parser(self) -> Optional[LkmlParser]: - if not self._parser and self.service_connection.githubCredentials: - self._parser = LkmlParser( - reader=GitHubReader(self.service_connection.githubCredentials) - ) + if not self._parser and self.github_credentials: + self._parser = LkmlParser(reader=GitHubReader(self.github_credentials)) return self._parser + @property + def github_credentials(self) -> Optional[GitHubCredentials]: + """ + Check if the credentials are informed and return them. + + We either get GitHubCredentials or `NoGitHubCredentials` + """ + if self.service_connection.githubCredentials and isinstance( + self.service_connection.githubCredentials, GitHubCredentials + ): + return self.service_connection.githubCredentials + return None + def list_datamodels(self) -> Iterable[LookmlModelExplore]: """ Fetch explores with the SDK """ - # First, pick up all the LookML Models - all_lookml_models: Sequence[LookmlModel] = self.client.all_lookml_models() + if self.source_config.includeDataModels: + # First, pick up all the LookML Models + all_lookml_models: Sequence[LookmlModel] = self.client.all_lookml_models() - # Then, fetch the explores for each of them - for lookml_model in all_lookml_models: - # Each LookML model have a list of explores we'll be ingesting - for explore_nav in ( - cast(Sequence[LookmlModelNavExplore], lookml_model.explores) or [] - ): - explore = self.client.lookml_model_explore( - lookml_model_name=lookml_model.name, explore_name=explore_nav.name - ) - yield explore + # Then, fetch the explores for each of them + for lookml_model in all_lookml_models: + # Each LookML model have a list of explores we'll be ingesting + for explore_nav in ( + cast(Sequence[LookmlModelNavExplore], lookml_model.explores) or [] + ): + if filter_by_datamodel( + self.source_config.dataModelFilterPattern, lookml_model.name + ): + self.status.filter( + lookml_model.name, "Data model (Explore) filtered out." + ) + continue + + explore = self.client.lookml_model_explore( + lookml_model_name=lookml_model.name, + explore_name=explore_nav.name, + ) + yield explore def yield_bulk_datamodel( self, model: LookmlModelExplore @@ -183,55 +207,63 @@ class LookerSource(DashboardServiceSource): Get the Explore and View information and prepare the model creation request """ - if self.source_config.includeDataModels: - try: - datamodel_name = build_datamodel_name(model.model_name, model.name) - if filter_by_datamodel( - self.source_config.dataModelFilterPattern, datamodel_name - ): - self.status.filter(datamodel_name, "Data model filtered out.") - else: - explore_datamodel = CreateDashboardDataModelRequest( - name=datamodel_name, - displayName=model.name, - description=model.description, - service=self.context.dashboard_service.fullyQualifiedName.__root__, - dataModelType=DataModelType.LookMlExplore.value, - serviceType=DashboardServiceType.Looker.value, - columns=get_columns_from_model(model), - sql=self._get_explore_sql(model), - ) - yield explore_datamodel - self.status.scanned(f"Data Model Scanned: {model.name}") + try: + datamodel_name = build_datamodel_name(model.model_name, model.name) + if filter_by_datamodel( + self.source_config.dataModelFilterPattern, datamodel_name + ): + self.status.filter(datamodel_name, "Data model filtered out.") + else: + explore_datamodel = CreateDashboardDataModelRequest( + name=datamodel_name, + displayName=model.name, + description=model.description, + service=self.context.dashboard_service.fullyQualifiedName.__root__, + dataModelType=DataModelType.LookMlExplore.value, + serviceType=DashboardServiceType.Looker.value, + columns=get_columns_from_model(model), + sql=self._get_explore_sql(model), + ) + yield explore_datamodel + self.status.scanned(f"Data Model Scanned: {model.name}") - # Maybe use the project_name as key too? - # Save the explores for when we create the lineage with the dashboards and views - self._explores_cache[ - explore_datamodel.name.__root__ - ] = self.context.dataModel # This is the newly created explore + # Maybe use the project_name as key too? + # Save the explores for when we create the lineage with the dashboards and views + self._explores_cache[ + explore_datamodel.name.__root__ + ] = self.context.dataModel # This is the newly created explore - # We can get VIEWs from the JOINs to know the dependencies - # We will only try and fetch if we have the credentials - if self.service_connection.githubCredentials: - for view in model.joins: - yield from self._process_view( - view_name=ViewName(view.name), explore=model + # We can get VIEWs from the JOINs to know the dependencies + # We will only try and fetch if we have the credentials + if self.github_credentials: + for view in model.joins: + + if filter_by_datamodel( + self.source_config.dataModelFilterPattern, view.name + ): + self.status.filter( + view.name, "Data model (View) filtered out." ) + continue - except ValidationError as err: - error = f"Validation error yielding Data Model [{model.name}]: {err}" - logger.debug(traceback.format_exc()) - logger.error(error) - self.status.failed( - name=model.name, error=error, stack_trace=traceback.format_exc() - ) - except Exception as err: - error = f"Wild error yielding Data Model [{model.name}]: {err}" - logger.debug(traceback.format_exc()) - logger.error(error) - self.status.failed( - name=model.name, error=error, stack_trace=traceback.format_exc() - ) + yield from self._process_view( + view_name=ViewName(view.name), explore=model + ) + + except ValidationError as err: + error = f"Validation error yielding Data Model [{model.name}]: {err}" + logger.debug(traceback.format_exc()) + logger.error(error) + self.status.failed( + name=model.name, error=error, stack_trace=traceback.format_exc() + ) + except Exception as err: + error = f"Wild error yielding Data Model [{model.name}]: {err}" + logger.debug(traceback.format_exc()) + logger.error(error) + self.status.failed( + name=model.name, error=error, stack_trace=traceback.format_exc() + ) def _get_explore_sql(self, explore: LookmlModelExplore) -> Optional[str]: """ @@ -239,7 +271,7 @@ class LookerSource(DashboardServiceSource): file definition and add it here """ # Only look to parse if creds are in - if self.service_connection.githubCredentials: + if self.github_credentials: try: # This will only parse if the file has not been parsed yet self.parser.parse_file(Includes(explore.source_file)) @@ -379,7 +411,6 @@ class LookerSource(DashboardServiceSource): """ Method to Get Dashboard Entity """ - dashboard_request = CreateDashboardRequest( name=clean_dashboard_name(dashboard_details.id), displayName=dashboard_details.title, diff --git a/ingestion/src/metadata/utils/filters.py b/ingestion/src/metadata/utils/filters.py index 0842bda9c1d..f0fd98de69a 100644 --- a/ingestion/src/metadata/utils/filters.py +++ b/ingestion/src/metadata/utils/filters.py @@ -229,7 +229,7 @@ def filter_by_datamodel( datamodel_filter_pattern: Optional[FilterPattern], datamodel_name: str ) -> bool: """ - Return True if the chart needs to be filtered, False otherwise + Return True if the models needs to be filtered, False otherwise Include takes precedence over exclude diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json index 411148ee8bf..68779d57dd7 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/lookerConnection.json @@ -11,6 +11,12 @@ "type": "string", "enum": ["Looker"], "default": "Looker" + }, + "noGitHubCredentials": { + "title": "No GitHub Credentials", + "description": "Do not set any credentials. Note that credentials are required to extract .lkml views and their lineage.", + "type": "object", + "additionalProperties": false } }, "properties": { @@ -41,7 +47,14 @@ "githubCredentials": { "title": "GitHub Credentials", "description": "Credentials to extract the .lkml files from a repository. This is required to get all the lineage and definitions.", - "$ref": "../../../../security/credentials/githubCredentials.json" + "oneOf": [ + { + "$ref": "#/definitions/noGitHubCredentials" + }, + { + "$ref": "../../../../security/credentials/githubCredentials.json" + } + ] }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction",