From 482431bcf43d5604eced4c8dbfae85540e6db88b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 21 Mar 2023 08:19:32 +0530 Subject: [PATCH] fix(ingest/superset): support superset v2 (#7588) Co-authored-by: John Joyce --- .../airflow-plugin/.gitignore | 3 +- metadata-ingestion/.gitignore | 3 +- .../src/datahub/ingestion/source/superset.py | 28 ++++++++++++++----- .../tests/unit/test_superset_source.py | 6 ++-- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/.gitignore b/metadata-ingestion-modules/airflow-plugin/.gitignore index 6392350547..6801b785ea 100644 --- a/metadata-ingestion-modules/airflow-plugin/.gitignore +++ b/metadata-ingestion-modules/airflow-plugin/.gitignore @@ -4,6 +4,7 @@ output pvenv36/ bq_credentials.json /tmp +*.bak # Byte-compiled / optimized / DLL files __pycache__/ @@ -138,4 +139,4 @@ dmypy.json # Generated classes src/datahub/metadata/ wheels/ -junit.quick.xml \ No newline at end of file +junit.quick.xml diff --git a/metadata-ingestion/.gitignore b/metadata-ingestion/.gitignore index 301391fd9c..c79677d8fc 100644 --- a/metadata-ingestion/.gitignore +++ b/metadata-ingestion/.gitignore @@ -6,6 +6,7 @@ pvenv36/ bq_credentials.json junit.*.xml /tmp +*.bak # Byte-compiled / optimized / DLL files __pycache__/ @@ -140,4 +141,4 @@ dmypy.json # Generated classes src/datahub/metadata/ -.preflight_sentinel \ No newline at end of file +.preflight_sentinel diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 4b51ce8d12..32a3f6a65a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -1,4 +1,5 @@ import json +import logging from functools import lru_cache from typing import Dict, Iterable, Optional @@ -35,6 +36,8 @@ from datahub.metadata.schema_classes import ( ) from datahub.utilities import config_clean +logger = logging.getLogger(__name__) + PAGE_SIZE = 25 @@ -58,7 +61,9 @@ chart_type_from_viz_type = { class SupersetConfig(ConfigModel): # See the Superset /security/login endpoint for details # https://superset.apache.org/docs/rest-api - connect_uri: str = Field(default="localhost:8088", description="Superset host URL.") + connect_uri: str = Field( + default="http://localhost:8088", description="Superset host URL." + ) display_uri: Optional[str] = Field( default=None, description="optional URL to use in links (if `connect_uri` is only for ingestion)", @@ -136,8 +141,7 @@ class SupersetSource(Source): login_response = requests.post( f"{self.config.connect_uri}/api/v1/security/login", - None, - { + json={ "username": self.config.username, "password": self.config.password, "refresh": True, @@ -146,6 +150,7 @@ class SupersetSource(Source): ) self.access_token = login_response.json()["access_token"] + logger.debug("Got access token from superset") self.session = requests.Session() self.session.headers.update( @@ -157,7 +162,7 @@ class SupersetSource(Source): ) # Test the connection - test_response = self.session.get(f"{self.config.connect_uri}/api/v1/database") + test_response = self.session.get(f"{self.config.connect_uri}/api/v1/dashboard/") if test_response.status_code == 200: pass # TODO(Gabe): how should we message about this error? @@ -251,15 +256,20 @@ class SupersetSource(Source): while current_dashboard_page * PAGE_SIZE <= total_dashboards: dashboard_response = self.session.get( - f"{self.config.connect_uri}/api/v1/dashboard", + f"{self.config.connect_uri}/api/v1/dashboard/", params=f"q=(page:{current_dashboard_page},page_size:{PAGE_SIZE})", ) + if dashboard_response.status_code != 200: + logger.warning( + f"Failed to get dashboard data: {dashboard_response.text}" + ) + dashboard_response.raise_for_status() + payload = dashboard_response.json() total_dashboards = payload.get("count") or 0 current_dashboard_page += 1 - payload = dashboard_response.json() for dashboard_data in payload["result"]: dashboard_snapshot = self.construct_dashboard_from_api_data( dashboard_data @@ -352,9 +362,13 @@ class SupersetSource(Source): while current_chart_page * PAGE_SIZE <= total_charts: chart_response = self.session.get( - f"{self.config.connect_uri}/api/v1/chart", + f"{self.config.connect_uri}/api/v1/chart/", params=f"q=(page:{current_chart_page},page_size:{PAGE_SIZE})", ) + if chart_response.status_code != 200: + logger.warning(f"Failed to get chart data: {chart_response.text}") + chart_response.raise_for_status() + current_chart_page += 1 payload = chart_response.json() diff --git a/metadata-ingestion/tests/unit/test_superset_source.py b/metadata-ingestion/tests/unit/test_superset_source.py index 85f8904ffc..912bfa3511 100644 --- a/metadata-ingestion/tests/unit/test_superset_source.py +++ b/metadata-ingestion/tests/unit/test_superset_source.py @@ -4,8 +4,8 @@ from datahub.ingestion.source.superset import SupersetConfig def test_default_values(): config = SupersetConfig.parse_obj({}) - assert config.connect_uri == "localhost:8088" - assert config.display_uri == "localhost:8088" + assert config.connect_uri == "http://localhost:8088" + assert config.display_uri == "http://localhost:8088" assert config.provider == "db" assert config.env == "PROD" assert config.username is None @@ -17,5 +17,5 @@ def test_set_display_uri(): config = SupersetConfig.parse_obj({"display_uri": display_uri}) - assert config.connect_uri == "localhost:8088" + assert config.connect_uri == "http://localhost:8088" assert config.display_uri == display_uri