From 939adf887b92a3ede8f974c9daeccef74e99f548 Mon Sep 17 00:00:00 2001 From: Mayur Singal <39544459+ulixius9@users.noreply.github.com> Date: Thu, 19 Jan 2023 21:09:43 +0530 Subject: [PATCH] Fix #8973: Superset fetch metadata from DB (#9645) * Docs #8973: Superset with SSO docs * Superset support for mysql & postgres db * remove unwanted field from yaml * Fix pylint * Fix superset tests * Fix sample data connection * ci fix & review comments * cypress & defualt provider fix --- .../v008__create_db_connection_info.sql | 13 + .../v008__create_db_connection_info.sql | 13 +- .../sample_data/dashboards/service.json | 7 +- .../metadata/examples/workflows/superset.yaml | 16 +- .../source/dashboard/superset/api_source.py | 181 ++++++++++++ .../source/dashboard/superset/client.py | 6 +- .../source/dashboard/superset/connection.py | 39 ++- .../source/dashboard/superset/db_source.py | 188 +++++++++++++ .../source/dashboard/superset/metadata.py | 259 +----------------- .../source/dashboard/superset/mixin.py | 91 ++++++ .../source/dashboard/superset/queries.py | 43 +++ .../connectors/dashboard/superset/sso.md | 14 + openmetadata-docs/content/menu.md | 2 + .../service/resources/EntityResourceTest.java | 2 +- .../resources/charts/ChartResourceTest.java | 4 +- .../dashboards/DashboardResourceTest.java | 6 +- .../DashboardServiceResourceTest.java | 65 +++-- .../IngestionPipelineResourceTest.java | 4 +- .../openmetadata/service/util/TestUtils.java | 10 +- .../dashboard/supersetConnection.json | 32 +-- .../entity/utils/supersetApiConnection.json | 37 +++ .../resources/ui/src/utils/ServiceUtils.tsx | 10 +- 22 files changed, 710 insertions(+), 332 deletions(-) create mode 100644 ingestion/src/metadata/ingestion/source/dashboard/superset/api_source.py create mode 100644 ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py create mode 100644 ingestion/src/metadata/ingestion/source/dashboard/superset/mixin.py create mode 100644 ingestion/src/metadata/ingestion/source/dashboard/superset/queries.py create mode 100644 openmetadata-docs/content/connectors/dashboard/superset/sso.md create mode 100644 openmetadata-spec/src/main/resources/json/schema/entity/utils/supersetApiConnection.json diff --git a/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql b/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql index 3ef38c58158..f2ff64834c9 100644 --- a/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql +++ b/bootstrap/sql/com.mysql.cj.jdbc.Driver/v008__create_db_connection_info.sql @@ -50,3 +50,16 @@ SET json = JSON_INSERT( REPLACE(JSON_UNQUOTE(JSON_EXTRACT(json, '$.fullyQualifiedName')),':','') ) WHERE JSON_EXTRACT(json, '$.serviceType') = 'Dagster'; + + +UPDATE dashboard_service_entity +SET json = JSON_INSERT( +JSON_REMOVE(json,'$.connection.config.username','$.connection.config.password','$.connection.config.provider'), +'$.connection.config.connection', +JSON_OBJECT( + 'username',JSON_EXTRACT(json,'$.connection.config.username'), + 'password',JSON_EXTRACT(json,'$.connection.config.password'), + 'provider',JSON_EXTRACT(json,'$.connection.config.provider') + ) +) +WHERE serviceType = 'Superset'; diff --git a/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql b/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql index 4aa2babbcb6..1a27c9e48ec 100644 --- a/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql +++ b/bootstrap/sql/org.postgresql.Driver/v008__create_db_connection_info.sql @@ -47,4 +47,15 @@ SET json = jsonb_set( '{fullyQualifiedName}', to_jsonb(replace(json ->> 'fullyQualifiedName',':','')) ) -WHERE json ->> 'serviceType' = 'Dagster'; \ No newline at end of file +WHERE json ->> 'serviceType' = 'Dagster'; + +UPDATE dashboard_service_entity +SET json = JSONB_SET(json::jsonb, +'{connection,config}',json::jsonb #>'{connection,config}' #- '{password}' #- '{username}' #- '{provider}'|| +jsonb_build_object('connection',jsonb_build_object( +'username',json #>'{connection,config,username}', +'password',json #>'{connection,config,password}', +'provider',json #>'{connection,config,provider}' +)), true) +where servicetype = 'Superset'; + diff --git a/ingestion/examples/sample_data/dashboards/service.json b/ingestion/examples/sample_data/dashboards/service.json index 7559df3a4bb..8eb4ba6854f 100644 --- a/ingestion/examples/sample_data/dashboards/service.json +++ b/ingestion/examples/sample_data/dashboards/service.json @@ -5,8 +5,11 @@ "config": { "type": "Superset", "hostPort": "http://localhost:8088", - "username": "admin", - "password": "admin" + "connection":{ + "username": "admin", + "password": "admin", + "provider": "db" + } } }, "sourceConfig": { diff --git a/ingestion/src/metadata/examples/workflows/superset.yaml b/ingestion/src/metadata/examples/workflows/superset.yaml index f7aa982e7ff..19fe06b376d 100644 --- a/ingestion/src/metadata/examples/workflows/superset.yaml +++ b/ingestion/src/metadata/examples/workflows/superset.yaml @@ -1,16 +1,24 @@ source: type: superset - serviceName: local_superset + serviceName: local_superset_12 serviceConnection: config: - hostPort: http://localhost:8080 - username: admin - password: admin + hostPort: http://localhost:8088 + connection: + type: Postgres + username: superset + password: superset + hostPort: localhost:5432 + database: superset + # username: admin + # password: admin + # provider: db type: Superset sourceConfig: config: chartFilterPattern: {} dashboardFilterPattern: {} + type: DashboardMetadata sink: type: metadata-rest config: {} diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/api_source.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/api_source.py new file mode 100644 index 00000000000..370426f5a64 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/api_source.py @@ -0,0 +1,181 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Superset source module +""" + +import traceback +from typing import Iterable, List, Optional + +from metadata.generated.schema.api.data.createChart import CreateChartRequest +from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest +from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest +from metadata.generated.schema.entity.data.chart import ChartType +from metadata.generated.schema.entity.data.dashboard import ( + Dashboard as Lineage_Dashboard, +) +from metadata.generated.schema.entity.data.table import Table +from metadata.generated.schema.type.entityReference import EntityReference +from metadata.ingestion.source.dashboard.superset.mixin import SupersetSourceMixin +from metadata.utils import fqn +from metadata.utils.helpers import get_standard_chart_type +from metadata.utils.logger import ingestion_logger + +logger = ingestion_logger() + + +class SupersetAPISource(SupersetSourceMixin): + """ + Superset API Source Class + """ + + def prepare(self): + """ + Fetching all charts available in superset + this step is done because fetch_total_charts api fetches all + the required information which is not available in fetch_charts_with_id api + """ + current_page = 0 + page_size = 25 + total_charts = self.client.fetch_total_charts() + while current_page * page_size <= total_charts: + charts = self.client.fetch_charts(current_page, page_size) + current_page += 1 + for index in range(len(charts["result"])): + self.all_charts[charts["ids"][index]] = charts["result"][index] + + def get_dashboards_list(self) -> Optional[List[object]]: + """ + Get List of all dashboards + """ + current_page = 0 + page_size = 25 + total_dashboards = self.client.fetch_total_dashboards() + while current_page * page_size <= total_dashboards: + dashboards = self.client.fetch_dashboards(current_page, page_size) + current_page += 1 + for dashboard in dashboards["result"]: + yield dashboard + + def yield_dashboard( + self, dashboard_details: dict + ) -> Iterable[CreateDashboardRequest]: + """ + Method to Get Dashboard Entity + """ + yield CreateDashboardRequest( + name=dashboard_details["id"], + displayName=dashboard_details["dashboard_title"], + description="", + dashboardUrl=dashboard_details["url"], + owner=self.get_owner_details(dashboard_details), + charts=[ + EntityReference(id=chart.id.__root__, type="chart") + for chart in self.context.charts + ], + service=EntityReference( + id=self.context.dashboard_service.id.__root__, type="dashboardService" + ), + ) + + def yield_dashboard_lineage_details( + self, dashboard_details: dict, db_service_name: str + ) -> Optional[Iterable[AddLineageRequest]]: + """ + Get lineage between dashboard and data sources + """ + for chart_id in self._get_charts_of_dashboard(dashboard_details): + chart_json = self.all_charts.get(chart_id) + if chart_json: + datasource_fqn = ( + self._get_datasource_fqn( + chart_json.get("datasource_id"), db_service_name + ) + if chart_json.get("datasource_id") + else None + ) + if not datasource_fqn: + continue + from_entity = self.metadata.get_by_name( + entity=Table, + fqn=datasource_fqn, + ) + try: + dashboard_fqn = fqn.build( + self.metadata, + entity_type=Lineage_Dashboard, + service_name=self.config.serviceName, + dashboard_name=str(dashboard_details["id"]), + ) + to_entity = self.metadata.get_by_name( + entity=Lineage_Dashboard, + fqn=dashboard_fqn, + ) + if from_entity and to_entity: + yield self._get_add_lineage_request( + to_entity=to_entity, from_entity=from_entity + ) + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.error( + f"Error to yield dashboard lineage details for DB service name [{db_service_name}]: {exc}" + ) + + def yield_dashboard_chart( + self, dashboard_details: dict + ) -> Optional[Iterable[CreateChartRequest]]: + """ + Metod to fetch charts linked to dashboard + """ + for chart_id in self._get_charts_of_dashboard(dashboard_details): + chart_json = self.all_charts.get(chart_id) + if not chart_json: + logger.warning(f"chart details for id: {chart_id} not found, skipped") + continue + chart = CreateChartRequest( + name=chart_json["id"], + displayName=chart_json.get("slice_name"), + description="", + chartType=get_standard_chart_type( + chart_json.get("viz_type", ChartType.Other.value) + ), + chartUrl=chart_json.get("url"), + service=EntityReference( + id=self.context.dashboard_service.id.__root__, + type="dashboardService", + ), + ) + yield chart + + def _get_datasource_fqn( + self, datasource_id: str, db_service_name: str + ) -> Optional[str]: + if db_service_name: + try: + datasource_json = self.client.fetch_datasource(datasource_id) + database_json = self.client.fetch_database( + datasource_json["result"]["database"]["id"] + ) + dataset_fqn = fqn.build( + self.metadata, + entity_type=Table, + table_name=datasource_json["result"]["table_name"], + schema_name=datasource_json["result"]["schema"], + database_name=database_json["result"]["parameters"]["database"], + service_name=db_service_name, + ) + return dataset_fqn + except KeyError as err: + logger.debug(traceback.format_exc()) + logger.warning( + f"Failed to fetch Datasource with id [{datasource_id}]: {err}" + ) + return None diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/client.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/client.py index 7d5c62b7d2d..09644e68213 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/superset/client.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/client.py @@ -55,10 +55,10 @@ class SupersetAuthenticationProvider(AuthenticationProvider): def _login_request(self) -> str: auth_request = { - "username": self.service_connection.username, - "password": self.service_connection.password.get_secret_value(), + "username": self.service_connection.connection.username, + "password": self.service_connection.connection.password.get_secret_value(), "refresh": True, - "provider": self.service_connection.provider, + "provider": self.service_connection.connection.provider.value, } return json.dumps(auth_request) diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py index 2bdc928fe1f..ba3f510a9c7 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/connection.py @@ -12,26 +12,57 @@ """ Source connection handler """ +from typing import Union + +from sqlalchemy.engine import Engine + from metadata.generated.schema.entity.services.connections.dashboard.supersetConnection import ( SupersetConnection, ) -from metadata.ingestion.connections.test_connections import SourceConnectionException +from metadata.generated.schema.entity.services.connections.database.mysqlConnection import ( + MysqlConnection, +) +from metadata.generated.schema.entity.services.connections.database.postgresConnection import ( + PostgresConnection, +) +from metadata.generated.schema.entity.utils.supersetApiConnection import ( + SupersetAPIConnection, +) +from metadata.ingestion.connections.test_connections import ( + SourceConnectionException, + test_connection_db_common, +) from metadata.ingestion.source.dashboard.superset.client import SupersetAPIClient +from metadata.ingestion.source.database.mysql.connection import ( + get_connection as mysql_get_connection, +) +from metadata.ingestion.source.database.postgres.connection import ( + get_connection as pg_get_connection, +) def get_connection(connection: SupersetConnection) -> SupersetAPIClient: """ Create connection """ - return SupersetAPIClient(connection) + if isinstance(connection.connection, SupersetAPIConnection): + return SupersetAPIClient(connection) + if isinstance(connection.connection, PostgresConnection): + return pg_get_connection(connection=connection.connection) + if isinstance(connection.connection, MysqlConnection): + return mysql_get_connection(connection=connection.connection) + return None -def test_connection(client: SupersetAPIClient) -> None: +def test_connection(client: Union[SupersetAPIClient, Engine]) -> None: """ Test connection """ try: - client.fetch_menu() + if isinstance(client, SupersetAPIClient): + client.fetch_menu() + else: + test_connection_db_common(client) except Exception as exc: msg = f"Unknown error connecting with {client}: {exc}." raise SourceConnectionException(msg) diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py new file mode 100644 index 00000000000..85b4fe6db5f --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/db_source.py @@ -0,0 +1,188 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Superset source module +""" + +import traceback +from typing import Iterable, List, Optional + +from sqlalchemy.engine import Engine + +from metadata.generated.schema.api.data.createChart import CreateChartRequest +from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest +from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest +from metadata.generated.schema.entity.data.chart import ChartType +from metadata.generated.schema.entity.data.dashboard import ( + Dashboard as Lineage_Dashboard, +) +from metadata.generated.schema.entity.data.table import Table +from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( + OpenMetadataConnection, +) +from metadata.generated.schema.metadataIngestion.workflow import ( + Source as WorkflowSource, +) +from metadata.generated.schema.type.entityReference import EntityReference +from metadata.ingestion.source.dashboard.superset.mixin import SupersetSourceMixin +from metadata.ingestion.source.dashboard.superset.queries import ( + FETCH_ALL_CHARTS, + FETCH_DASHBOARDS, +) +from metadata.utils import fqn +from metadata.utils.helpers import get_standard_chart_type +from metadata.utils.logger import ingestion_logger + +logger = ingestion_logger() + + +class SupersetDBSource(SupersetSourceMixin): + """ + Superset DB Source Class + """ + + def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection): + super().__init__(config, metadata_config) + self.engine: Engine = self.client + + def prepare(self): + """ + Fetching all charts available in superset + this step is done because fetch_total_charts api fetches all + the required information which is not available in fetch_charts_with_id api + """ + charts = self.engine.execute(FETCH_ALL_CHARTS) + for chart in charts: + self.all_charts[chart.id] = dict(chart) + + def get_dashboards_list(self) -> Optional[List[object]]: + """ + Get List of all dashboards + """ + dashboards = self.engine.execute(FETCH_DASHBOARDS) + for dashboard in dashboards: + yield dict(dashboard) + + def yield_dashboard( + self, dashboard_details: dict + ) -> Iterable[CreateDashboardRequest]: + """ + Method to Get Dashboard Entity + """ + yield CreateDashboardRequest( + name=dashboard_details["id"], + displayName=dashboard_details["dashboard_title"], + description="", + dashboardUrl=f"/superset/dashboard/{dashboard_details['id']}", + owner=self.get_owner_details(dashboard_details), + charts=[ + EntityReference(id=chart.id.__root__, type="chart") + for chart in self.context.charts + ], + service=EntityReference( + id=self.context.dashboard_service.id.__root__, type="dashboardService" + ), + ) + + def yield_dashboard_lineage_details( + self, dashboard_details: dict, db_service_name: str + ) -> Optional[Iterable[AddLineageRequest]]: + """ + Get lineage between dashboard and data sources + """ + for chart_id in self._get_charts_of_dashboard(dashboard_details): + chart_json = self.all_charts.get(chart_id) + if chart_json: + datasource_fqn = ( + self._get_datasource_fqn(chart_json, db_service_name) + if chart_json.get("table_name") + else None + ) + if not datasource_fqn: + continue + from_entity = self.metadata.get_by_name( + entity=Table, + fqn=datasource_fqn, + ) + try: + dashboard_fqn = fqn.build( + self.metadata, + entity_type=Lineage_Dashboard, + service_name=self.config.serviceName, + dashboard_name=str(dashboard_details["id"]), + ) + to_entity = self.metadata.get_by_name( + entity=Lineage_Dashboard, + fqn=dashboard_fqn, + ) + if from_entity and to_entity: + yield self._get_add_lineage_request( + to_entity=to_entity, from_entity=from_entity + ) + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.error( + f"Error to yield dashboard lineage details for DB service name [{db_service_name}]: {exc}" + ) + + def yield_dashboard_chart( + self, dashboard_details: dict + ) -> Optional[Iterable[CreateChartRequest]]: + """ + Metod to fetch charts linked to dashboard + """ + for chart_id in self._get_charts_of_dashboard(dashboard_details): + chart_json = self.all_charts.get(chart_id) + if not chart_json: + logger.warning(f"chart details for id: {chart_id} not found, skipped") + continue + chart = CreateChartRequest( + name=chart_json["id"], + displayName=chart_json.get("slice_name"), + description="", + chartType=get_standard_chart_type( + chart_json.get("viz_type", ChartType.Other.value) + ), + chartUrl=f"/explore/?slice_id={chart_json['id']}", + service=EntityReference( + id=self.context.dashboard_service.id.__root__, + type="dashboardService", + ), + ) + yield chart + + def _get_database_name(self, sqa_str: str) -> str: + if sqa_str: + return sqa_str.split("/")[-1] + return None + + def _get_datasource_fqn( + self, chart_json: dict, db_service_name: str + ) -> Optional[str]: + if db_service_name: + try: + dataset_fqn = fqn.build( + self.metadata, + entity_type=Table, + table_name=chart_json.get("table_name"), + database_name=self._get_database_name( + chart_json.get("sqlalchemy_uri") + ), + schema_name=chart_json.get("schema"), + service_name=db_service_name, + ) + return dataset_fqn + except KeyError as err: + logger.debug(traceback.format_exc()) + logger.warning( + f"Failed to fetch Datasource with id [{chart_json.get('table_name')}]: {err}" + ) + return None diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/metadata.py index a1593eb8023..cd893f6cbd2 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/superset/metadata.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/metadata.py @@ -11,94 +11,28 @@ """ Superset source module """ - -import json -import traceback -from typing import Iterable, List, Optional - -from metadata.generated.schema.api.data.createChart import CreateChartRequest -from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest -from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest -from metadata.generated.schema.entity.data.chart import ChartType -from metadata.generated.schema.entity.data.dashboard import ( - Dashboard as Lineage_Dashboard, -) -from metadata.generated.schema.entity.data.table import Table from metadata.generated.schema.entity.services.connections.dashboard.supersetConnection import ( SupersetConnection, ) from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( OpenMetadataConnection, ) -from metadata.generated.schema.entity.services.dashboardService import ( - DashboardServiceType, +from metadata.generated.schema.entity.utils.supersetApiConnection import ( + SupersetAPIConnection, ) from metadata.generated.schema.metadataIngestion.workflow import ( Source as WorkflowSource, ) -from metadata.generated.schema.type.entityReference import EntityReference -from metadata.ingestion.api.source import InvalidSourceException, SourceStatus -from metadata.ingestion.source.dashboard.dashboard_service import DashboardServiceSource -from metadata.utils import fqn -from metadata.utils.helpers import get_standard_chart_type -from metadata.utils.logger import ingestion_logger - -logger = ingestion_logger() +from metadata.ingestion.api.source import InvalidSourceException +from metadata.ingestion.source.dashboard.superset.api_source import SupersetAPISource +from metadata.ingestion.source.dashboard.superset.db_source import SupersetDBSource -def get_metric_name(metric): - """ - Get metric name - - Args: - metric: - Returns: - """ - if not metric: - return "" - if isinstance(metric, str): - return metric - label = metric.get("label") - - return label or None - - -def get_filter_name(filter_obj): - """ - Get filter name - - Args: - filter_obj: - - Returns: - str - """ - sql_expression = filter_obj.get("sqlExpression") - if sql_expression: - return sql_expression - - clause = filter_obj.get("clause") - column = filter_obj.get("subject") - operator = filter_obj.get("operator") - comparator = filter_obj.get("comparator") - return f"{clause} {column} {operator} {comparator}" - - -class SupersetSource(DashboardServiceSource): +class SupersetSource: """ Superset Source Class """ - config: WorkflowSource - metadata_config: OpenMetadataConnection - status: SourceStatus - platform = "superset" - service_type = DashboardServiceType.Superset.value - - def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection): - self.all_charts = {} - super().__init__(config, metadata_config) - @classmethod def create(cls, config_dict: dict, metadata_config: OpenMetadataConnection): config = WorkflowSource.parse_obj(config_dict) @@ -107,181 +41,6 @@ class SupersetSource(DashboardServiceSource): raise InvalidSourceException( f"Expected SupersetConnection, but got {connection}" ) - return cls(config, metadata_config) - - def prepare(self): - """ - Fetching all charts available in superset - this step is done because fetch_total_charts api fetches all - the required information which is not available in fetch_charts_with_id api - """ - current_page = 0 - page_size = 25 - total_charts = self.client.fetch_total_charts() - while current_page * page_size <= total_charts: - charts = self.client.fetch_charts(current_page, page_size) - current_page += 1 - for index in range(len(charts["result"])): - self.all_charts[charts["ids"][index]] = charts["result"][index] - - def get_dashboards_list(self) -> Optional[List[object]]: - """ - Get List of all dashboards - """ - current_page = 0 - page_size = 25 - total_dashboards = self.client.fetch_total_dashboards() - while current_page * page_size <= total_dashboards: - dashboards = self.client.fetch_dashboards(current_page, page_size) - current_page += 1 - for dashboard in dashboards["result"]: - yield dashboard - - def get_dashboard_name(self, dashboard: dict) -> str: - """ - Get Dashboard Name - """ - return dashboard["dashboard_title"] - - def get_dashboard_details(self, dashboard: dict) -> dict: - """ - Get Dashboard Details - """ - return dashboard - - def get_owner_details(self, dashboard_details: dict) -> EntityReference: - if dashboard_details.get("owners"): - owner = dashboard_details["owners"][0] - user = self.metadata.get_user_by_email(owner.get("email")) - if user: - return EntityReference(id=user.id.__root__, type="user") - return None - - def yield_dashboard( - self, dashboard_details: dict - ) -> Iterable[CreateDashboardRequest]: - """ - Method to Get Dashboard Entity - """ - yield CreateDashboardRequest( - name=dashboard_details["id"], - displayName=dashboard_details["dashboard_title"], - description="", - dashboardUrl=dashboard_details["url"], - owner=self.get_owner_details(dashboard_details), - charts=[ - EntityReference(id=chart.id.__root__, type="chart") - for chart in self.context.charts - ], - service=EntityReference( - id=self.context.dashboard_service.id.__root__, type="dashboardService" - ), - ) - - def _get_charts_of_dashboard(self, dashboard_details: dict) -> List[str]: - """ - Method to fetch chart ids linked to dashboard - """ - raw_position_data = dashboard_details.get("position_json", {}) - if raw_position_data: - position_data = json.loads(raw_position_data) - return [ - value.get("meta", {}).get("chartId") - for key, value in position_data.items() - if key.startswith("CHART-") and value.get("meta", {}).get("chartId") - ] - return [] - - def yield_dashboard_lineage_details( - self, dashboard_details: dict, db_service_name: str - ) -> Optional[Iterable[AddLineageRequest]]: - """ - Get lineage between dashboard and data sources - """ - for chart_id in self._get_charts_of_dashboard(dashboard_details): - chart_json = self.all_charts.get(chart_id) - if chart_json: - datasource_fqn = ( - self._get_datasource_fqn( - chart_json.get("datasource_id"), db_service_name - ) - if chart_json.get("datasource_id") - else None - ) - if not datasource_fqn: - continue - from_entity = self.metadata.get_by_name( - entity=Table, - fqn=datasource_fqn, - ) - try: - dashboard_fqn = fqn.build( - self.metadata, - entity_type=Lineage_Dashboard, - service_name=self.config.serviceName, - dashboard_name=str(dashboard_details["id"]), - ) - to_entity = self.metadata.get_by_name( - entity=Lineage_Dashboard, - fqn=dashboard_fqn, - ) - if from_entity and to_entity: - yield self._get_add_lineage_request( - to_entity=to_entity, from_entity=from_entity - ) - except Exception as exc: - logger.debug(traceback.format_exc()) - logger.error( - f"Error to yield dashboard lineage details for DB service name [{db_service_name}]: {exc}" - ) - - def yield_dashboard_chart( - self, dashboard_details: dict - ) -> Optional[Iterable[CreateChartRequest]]: - """ - Metod to fetch charts linked to dashboard - """ - for chart_id in self._get_charts_of_dashboard(dashboard_details): - chart_json = self.all_charts.get(chart_id) - if not chart_json: - logger.warning(f"chart details for id: {chart_id} not found, skipped") - continue - chart = CreateChartRequest( - name=chart_json["id"], - displayName=chart_json.get("slice_name"), - description="", - chartType=get_standard_chart_type( - chart_json.get("viz_type", ChartType.Other.value) - ), - chartUrl=chart_json.get("url"), - service=EntityReference( - id=self.context.dashboard_service.id.__root__, - type="dashboardService", - ), - ) - yield chart - - def _get_datasource_fqn( - self, datasource_id: str, db_service_name: str - ) -> Optional[str]: - if db_service_name: - try: - datasource_json = self.client.fetch_datasource(datasource_id) - database_json = self.client.fetch_database( - datasource_json["result"]["database"]["id"] - ) - dataset_fqn = fqn.build( - self.metadata, - entity_type=Table, - table_name=datasource_json["result"]["table_name"], - schema_name=datasource_json["result"]["schema"], - database_name=database_json["result"]["parameters"]["database"], - service_name=db_service_name, - ) - return dataset_fqn - except KeyError as err: - logger.debug(traceback.format_exc()) - logger.warning( - f"Failed to fetch Datasource with id [{datasource_id}]: {err}" - ) - return None + if isinstance(connection.connection, SupersetAPIConnection): + return SupersetAPISource(config, metadata_config) + return SupersetDBSource(config, metadata_config) diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/mixin.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/mixin.py new file mode 100644 index 00000000000..f0aaf722f81 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/mixin.py @@ -0,0 +1,91 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Superset mixin module +""" +import json +from typing import List + +from metadata.generated.schema.entity.services.connections.dashboard.supersetConnection import ( + SupersetConnection, +) +from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( + OpenMetadataConnection, +) +from metadata.generated.schema.entity.services.dashboardService import ( + DashboardServiceType, +) +from metadata.generated.schema.metadataIngestion.workflow import ( + Source as WorkflowSource, +) +from metadata.generated.schema.type.entityReference import EntityReference +from metadata.ingestion.api.source import InvalidSourceException, SourceStatus +from metadata.ingestion.source.dashboard.dashboard_service import DashboardServiceSource + + +class SupersetSourceMixin(DashboardServiceSource): + """ + Superset DB Source Class + """ + + config: WorkflowSource + metadata_config: OpenMetadataConnection + status: SourceStatus + platform = "superset" + service_type = DashboardServiceType.Superset.value + service_connection: SupersetConnection + + def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection): + super().__init__(config, metadata_config) + self.all_charts = {} + + @classmethod + def create(cls, config_dict: dict, metadata_config: OpenMetadataConnection): + config = WorkflowSource.parse_obj(config_dict) + connection: SupersetConnection = config.serviceConnection.__root__.config + if not isinstance(connection, SupersetConnection): + raise InvalidSourceException( + f"Expected SupersetConnection, but got {connection}" + ) + return cls(config, metadata_config) + + def get_dashboard_name(self, dashboard: dict) -> str: + """ + Get Dashboard Name + """ + return dashboard["dashboard_title"] + + def get_dashboard_details(self, dashboard: dict) -> dict: + """ + Get Dashboard Details + """ + return dashboard + + def get_owner_details(self, dashboard_details: dict) -> EntityReference: + if dashboard_details.get("email"): + user = self.metadata.get_user_by_email(dashboard_details["email"]) + if user: + return EntityReference(id=user.id.__root__, type="user") + return None + + def _get_charts_of_dashboard(self, dashboard_details: dict) -> List[str]: + """ + Method to fetch chart ids linked to dashboard + """ + raw_position_data = dashboard_details.get("position_json", {}) + if raw_position_data: + position_data = json.loads(raw_position_data) + return [ + value.get("meta", {}).get("chartId") + for key, value in position_data.items() + if key.startswith("CHART-") and value.get("meta", {}).get("chartId") + ] + return [] diff --git a/ingestion/src/metadata/ingestion/source/dashboard/superset/queries.py b/ingestion/src/metadata/ingestion/source/dashboard/superset/queries.py new file mode 100644 index 00000000000..1f675993065 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/dashboard/superset/queries.py @@ -0,0 +1,43 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Queries to fetch data from superset +""" + +FETCH_ALL_CHARTS = """ +select + s.id, + s.slice_name, + t.table_name, + t.schema, + db.database_name, + db.sqlalchemy_uri +from + slices s left join "tables" t +on s.datasource_id = t.id and s.datasource_type = 'table' + left join "dbs" db +on db.id = t.database_id +""" + + +FETCH_DASHBOARDS = """ +select + d.id, + d.dashboard_title, + d.position_json, + au.email +from + dashboards d +LEFT JOIN + ab_user au +ON + d.created_by_fk = au.id +""" diff --git a/openmetadata-docs/content/connectors/dashboard/superset/sso.md b/openmetadata-docs/content/connectors/dashboard/superset/sso.md new file mode 100644 index 00000000000..c7605eded2f --- /dev/null +++ b/openmetadata-docs/content/connectors/dashboard/superset/sso.md @@ -0,0 +1,14 @@ +--- +title: Superset +slug: /connectors/dashboard/superset/sso +--- + +# Superset with SSO + +OpenMetadata utilizes [Superset REST APIs](https://superset.apache.org/docs/api/) to retrieve metadata from Superset. These APIs support two modes of authentication: `db` and `ldap`. At this time, `OAuth` authentication is not supported by these APIs. + +Although the Superset REST APIs do not support OAuth authentication, there are still two ways for a user to authenticate through the API: + +- **Using admin user credentials**: When a Superset instance is initialized, a default admin user is created with the username and password both set as "admin". This admin user can be used to authenticate to the Superset APIs via the "db" authentication mode. + +- **Using database credentials**: You can fetch metadata from superset instance by providing the `mysql` or `postgres` database connection details. \ No newline at end of file diff --git a/openmetadata-docs/content/menu.md b/openmetadata-docs/content/menu.md index 30e7b2a93c1..119160fbc7a 100644 --- a/openmetadata-docs/content/menu.md +++ b/openmetadata-docs/content/menu.md @@ -378,6 +378,8 @@ site_menu: url: /connectors/dashboard/superset/airflow - category: Connectors / Dashboard / Superset / CLI url: /connectors/dashboard/superset/cli + - category: Connectors / Dashboard / Superset / SSO + url: /connectors/dashboard/superset/sso - category: Connectors / Dashboard / Metabase url: /connectors/dashboard/metabase - category: Connectors / Dashboard / Metabase / Airflow diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java index 35a50a02660..63ed8564268 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java @@ -259,7 +259,7 @@ public abstract class EntityResourceTest CHART_REFERENCES; diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/charts/ChartResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/charts/ChartResourceTest.java index 71c060b5383..133adb901e4 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/charts/ChartResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/charts/ChartResourceTest.java @@ -55,7 +55,7 @@ public class ChartResourceTest extends EntityResourceTest { @Test void post_chartWithDifferentService_200_ok(TestInfo test) throws IOException { - EntityReference[] differentServices = {SUPERSET_REFERENCE, LOOKER_REFERENCE}; + EntityReference[] differentServices = {METABASE_REFERENCE, LOOKER_REFERENCE}; // Create chart for each service and test APIs for (EntityReference service : differentServices) { @@ -99,7 +99,7 @@ public class ChartResourceTest extends EntityResourceTest { @Override public EntityReference getContainer() { - return SUPERSET_REFERENCE; + return METABASE_REFERENCE; } @Override diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/dashboards/DashboardResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/dashboards/DashboardResourceTest.java index 2edac03ad7d..8565cd69470 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/dashboards/DashboardResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/dashboards/DashboardResourceTest.java @@ -68,7 +68,7 @@ public class DashboardResourceTest extends EntityResourceTest authHeaders = ADMIN_AUTH_HEADERS; - SupersetConnection supersetConnection = - new SupersetConnection() + MetabaseConnection metabaseConnection = + new MetabaseConnection() .withHostPort(new URI("http://localhost:8080")) .withUsername("user") .withPassword("password"); createAndCheckEntity(createRequest(test, 1).withDescription(null), authHeaders); createAndCheckEntity(createRequest(test, 2).withDescription("description"), authHeaders); createAndCheckEntity( - createRequest(test, 3).withConnection(new DashboardConnection().withConfig(supersetConnection)), authHeaders); + createRequest(test, 3).withConnection(new DashboardConnection().withConfig(metabaseConnection)), authHeaders); } @Test @@ -102,7 +102,7 @@ public class DashboardServiceResourceTest extends EntityResourceTest authHeaders) { if (expectedDashboardConnection != null && actualDashboardConnection != null) { - if (dashboardServiceType == CreateDashboardService.DashboardServiceType.Superset) { - SupersetConnection expectedSupersetConnection = (SupersetConnection) expectedDashboardConnection.getConfig(); - SupersetConnection actualSupersetConnection; - if (actualDashboardConnection.getConfig() instanceof SupersetConnection) { - actualSupersetConnection = (SupersetConnection) actualDashboardConnection.getConfig(); + if (dashboardServiceType == CreateDashboardService.DashboardServiceType.Metabase) { + MetabaseConnection expectedmetabaseConnection = (MetabaseConnection) expectedDashboardConnection.getConfig(); + MetabaseConnection actualMetabaseConnection; + if (actualDashboardConnection.getConfig() instanceof MetabaseConnection) { + actualMetabaseConnection = (MetabaseConnection) actualDashboardConnection.getConfig(); } else { - actualSupersetConnection = - JsonUtils.convertValue(actualDashboardConnection.getConfig(), SupersetConnection.class); + actualMetabaseConnection = + JsonUtils.convertValue(actualDashboardConnection.getConfig(), MetabaseConnection.class); } - assertEquals(expectedSupersetConnection.getHostPort(), actualSupersetConnection.getHostPort()); - assertEquals(expectedSupersetConnection.getProvider(), actualSupersetConnection.getProvider()); + assertEquals(expectedmetabaseConnection.getHostPort(), actualMetabaseConnection.getHostPort()); if (ADMIN_AUTH_HEADERS.equals(authHeaders) || INGESTION_BOT_AUTH_HEADERS.equals(authHeaders)) { - assertEquals(expectedSupersetConnection.getUsername(), actualSupersetConnection.getUsername()); - assertTrue(actualSupersetConnection.getPassword().startsWith("secret:/openmetadata/dashboard/")); - assertTrue(actualSupersetConnection.getPassword().endsWith("/password")); + assertEquals(expectedmetabaseConnection.getUsername(), actualMetabaseConnection.getUsername()); + assertTrue(actualMetabaseConnection.getPassword().startsWith("secret:/openmetadata/dashboard/")); + assertTrue(actualMetabaseConnection.getPassword().endsWith("/password")); } else { - assertNull(actualSupersetConnection.getUsername()); - assertNull(actualSupersetConnection.getPassword()); + assertNull(actualMetabaseConnection.getUsername()); + assertNull(actualMetabaseConnection.getPassword()); } } } @@ -266,18 +265,18 @@ public class DashboardServiceResourceTest extends EntityResourceTest(); ChartResourceTest chartResourceTest = new ChartResourceTest(); for (int i = 0; i < 3; i++) { - CreateChart createChart = chartResourceTest.createRequest(test, i).withService(SUPERSET_REFERENCE); + CreateChart createChart = chartResourceTest.createRequest(test, i).withService(METABASE_REFERENCE); Chart chart = chartResourceTest.createEntity(createChart, ADMIN_AUTH_HEADERS); CHART_REFERENCES.add(chart.getEntityReference()); } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/services/ingestionpipelines/IngestionPipelineResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/services/ingestionpipelines/IngestionPipelineResourceTest.java index 4764969d145..6e041de618a 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/services/ingestionpipelines/IngestionPipelineResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/services/ingestionpipelines/IngestionPipelineResourceTest.java @@ -334,7 +334,7 @@ public class IngestionPipelineResourceTest extends EntityResourceTest { export const getTestConnectionType = (serviceCat: ServiceCategory) => { switch (serviceCat) { case ServiceCategory.MESSAGING_SERVICES: - return ConnectionType.Messaging; + return ConnectionTypeEnum.Messaging; case ServiceCategory.DASHBOARD_SERVICES: - return ConnectionType.Dashboard; + return ConnectionTypeEnum.Dashboard; case ServiceCategory.PIPELINE_SERVICES: - return ConnectionType.Pipeline; + return ConnectionTypeEnum.Pipeline; case ServiceCategory.DATABASE_SERVICES: default: - return ConnectionType.Database; + return ConnectionTypeEnum.Database; } };