mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-02 21:54:07 +00:00
feat(ingest): adding redash source (#2918)
Co-authored-by: tibrahim <taufiq.ibrahim@bizzy.co.id>
This commit is contained in:
parent
24dc67f484
commit
dd1b81b727
@ -1,6 +1,7 @@
|
||||
import lookerLogo from '../../images/lookerlogo.png';
|
||||
import supersetLogo from '../../images/supersetlogo.png';
|
||||
import airflowLogo from '../../images/airflowlogo.png';
|
||||
import redashLogo from '../../images/redashlogo.png';
|
||||
|
||||
/**
|
||||
* TODO: This is a temporary solution, until the backend can push logos for all data platform types.
|
||||
@ -15,5 +16,8 @@ export function getLogoFromPlatform(platform: string) {
|
||||
if (platform.toLowerCase() === 'airflow') {
|
||||
return airflowLogo;
|
||||
}
|
||||
if (platform.toLowerCase() === 'redash') {
|
||||
return redashLogo;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
BIN
datahub-web-react/src/images/redashlogo.png
Normal file
BIN
datahub-web-react/src/images/redashlogo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.8 KiB |
@ -53,6 +53,7 @@ Sources:
|
||||
| [mysql](./source_docs/mysql.md) | `pip install 'acryl-datahub[mysql]'` | MySQL source |
|
||||
| [oracle](./source_docs/oracle.md) | `pip install 'acryl-datahub[oracle]'` | Oracle source |
|
||||
| [postgres](./source_docs/postgres.md) | `pip install 'acryl-datahub[postgres]'` | Postgres source |
|
||||
| [redash](./source_docs/redash.md) | `pip install 'acryl-datahub[redash]'` | Redash source |
|
||||
| [redshift](./source_docs/redshift.md) | `pip install 'acryl-datahub[redshift]'` | Redshift source |
|
||||
| [sagemaker](./source_docs/sagemaker.md) | `pip install 'acryl-datahub[sagemaker]'` | AWS SageMaker source |
|
||||
| [snowflake](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
|
||||
|
||||
@ -535,5 +535,25 @@
|
||||
}
|
||||
},
|
||||
"proposedDelta": null
|
||||
},
|
||||
{
|
||||
"auditHeader": null,
|
||||
"proposedSnapshot": {
|
||||
"com.linkedin.pegasus2avro.metadata.snapshot.DataPlatformSnapshot": {
|
||||
"urn": "urn:li:dataPlatform:redash",
|
||||
"aspects": [
|
||||
{
|
||||
"com.linkedin.pegasus2avro.dataplatform.DataPlatformInfo": {
|
||||
"datasetNameDelimiter": ".",
|
||||
"name": "redash",
|
||||
"displayName": "Redash",
|
||||
"type": "OTHERS",
|
||||
"logoUrl": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/redashlogo.png"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"proposedDelta": null
|
||||
}
|
||||
]
|
||||
|
||||
25
metadata-ingestion/examples/recipes/redash_to_file.yml
Normal file
25
metadata-ingestion/examples/recipes/redash_to_file.yml
Normal file
@ -0,0 +1,25 @@
|
||||
---
|
||||
source:
|
||||
type: "redash"
|
||||
config:
|
||||
connect_uri: http://localhost:5000/
|
||||
api_key: REDASH_API_KEY
|
||||
|
||||
# Optionals
|
||||
# api_page_limit: 1 #default: None, no limit on ingested dashboards and charts API pagination
|
||||
# skip_draft: true #default: true, only ingest published dashboards and charts
|
||||
# dashboard_patterns:
|
||||
# deny:
|
||||
# - ^denied dashboard.*
|
||||
# allow:
|
||||
# - .*allowed dashboard.*
|
||||
# chart_patterns:
|
||||
# deny:
|
||||
# - ^denied chart.*
|
||||
# allow:
|
||||
# - .*allowed chart.*
|
||||
|
||||
sink:
|
||||
type: "file"
|
||||
config:
|
||||
filename: "${HOME}/redash_output_mces.json"
|
||||
@ -100,6 +100,7 @@ plugins: Dict[str, Set[str]] = {
|
||||
"okta": {"okta~=1.7.0"},
|
||||
"oracle": sql_common | {"cx_Oracle"},
|
||||
"postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"},
|
||||
"redash": {"redash-toolbelt"},
|
||||
"redshift": sql_common | {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2"},
|
||||
"sagemaker": aws_common,
|
||||
"snowflake": sql_common | {"snowflake-sqlalchemy<=1.2.4"},
|
||||
@ -166,6 +167,7 @@ base_dev_requirements = {
|
||||
"sagemaker",
|
||||
"datahub-kafka",
|
||||
"datahub-rest",
|
||||
"redash",
|
||||
# airflow is added below
|
||||
]
|
||||
for dependency in plugins[plugin]
|
||||
@ -203,6 +205,7 @@ full_test_dev_requirements = {
|
||||
"mysql",
|
||||
"snowflake",
|
||||
"sql-profiles",
|
||||
"redash",
|
||||
]
|
||||
for dependency in plugins[plugin]
|
||||
),
|
||||
@ -233,6 +236,7 @@ entry_points = {
|
||||
"okta = datahub.ingestion.source.identity.okta:OktaSource",
|
||||
"oracle = datahub.ingestion.source.sql.oracle:OracleSource",
|
||||
"postgres = datahub.ingestion.source.sql.postgres:PostgresSource",
|
||||
"redash = datahub.ingestion.source.redash:RedashSource",
|
||||
"redshift = datahub.ingestion.source.sql.redshift:RedshiftSource",
|
||||
"snowflake = datahub.ingestion.source.sql.snowflake:SnowflakeSource",
|
||||
"snowflake-usage = datahub.ingestion.source.usage.snowflake_usage:SnowflakeUsageSource",
|
||||
|
||||
66
metadata-ingestion/source_docs/redash.md
Normal file
66
metadata-ingestion/source_docs/redash.md
Normal file
@ -0,0 +1,66 @@
|
||||
# Redash
|
||||
|
||||
For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
|
||||
|
||||
## Setup
|
||||
|
||||
To install this plugin, run `pip install 'acryl-datahub[redash]'`.
|
||||
|
||||
## Capabilities
|
||||
|
||||
This plugin extracts the following:
|
||||
|
||||
- Redash dashboards and queries/visualization
|
||||
|
||||
## Quickstart recipe
|
||||
|
||||
Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
|
||||
|
||||
For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
|
||||
|
||||
```yml
|
||||
source:
|
||||
type: "redash"
|
||||
config:
|
||||
connect_uri: http://localhost:5000/
|
||||
api_key: REDASH_API_KEY
|
||||
|
||||
# Optionals
|
||||
# api_page_limit: 1 #default: None, no limit on ingested dashboards and charts API pagination
|
||||
# skip_draft: true #default: true, only ingest published dashboards and charts
|
||||
# dashboard_patterns:
|
||||
# deny:
|
||||
# - ^denied dashboard.*
|
||||
# allow:
|
||||
# - .*allowed dashboard.*
|
||||
# chart_patterns:
|
||||
# deny:
|
||||
# - ^denied chart.*
|
||||
# allow:
|
||||
# - .*allowed chart.*
|
||||
```
|
||||
|
||||
## Config details
|
||||
|
||||
Note that a `.` is used to denote nested fields in the YAML recipe.
|
||||
|
||||
| Field | Required | Default | Description |
|
||||
| -------------------------- | -------- | ---------------------- | ---------------------------------------------------------------- |
|
||||
| `connect_uri` | ✅ | http://localhost:5000/ | Redash base URL. |
|
||||
| `api_key` | ✅ | | Redash user API key. |
|
||||
| `api_page_limit` | | `None` | Limit on ingested dashboards and charts API pagination. |
|
||||
| `skip_draft` | | `true` | Only ingest published dashboards and charts. |
|
||||
| `dashboard_patterns.allow` | | | List of regex patterns for dashboards to include in ingestion. |
|
||||
| `dashboard_patterns.deny` | | | List of regex patterns for dashboards to exclude from ingestion. |
|
||||
| `chart_patterns.allow` | | | List of regex patterns for charts to include in ingestion. |
|
||||
| `chart_patterns.deny` | | | List of regex patterns for charts to exclude from ingestion. |
|
||||
| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
|
||||
|
||||
|
||||
## Compatibility
|
||||
|
||||
Coming soon!
|
||||
|
||||
## Questions
|
||||
|
||||
If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://slack.datahubproject.io/)!
|
||||
468
metadata-ingestion/src/datahub/ingestion/source/redash.py
Normal file
468
metadata-ingestion/src/datahub/ingestion/source/redash.py
Normal file
@ -0,0 +1,468 @@
|
||||
import logging
|
||||
import math
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
import dateutil.parser as dp
|
||||
from redash_toolbelt import Redash
|
||||
|
||||
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
||||
from datahub.emitter.mce_builder import DEFAULT_ENV
|
||||
from datahub.ingestion.api.common import PipelineContext
|
||||
from datahub.ingestion.api.source import Source, SourceReport
|
||||
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
||||
AuditStamp,
|
||||
ChangeAuditStamps,
|
||||
)
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
||||
ChartSnapshot,
|
||||
DashboardSnapshot,
|
||||
)
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
||||
from datahub.metadata.schema_classes import (
|
||||
ChartInfoClass,
|
||||
ChartTypeClass,
|
||||
DashboardInfoClass,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
PAGE_SIZE = 25
|
||||
|
||||
DEFAULT_DATA_SOURCE_PLATFORM = "external"
|
||||
DEFAULT_DATA_BASE_NAME = "default"
|
||||
|
||||
# TODO: update Datahub registered platform name. Currently we use external for unmapped platform
|
||||
# Supported data source list from REDASH_BASE_URL/api/data_sources/types or https://redash.io/integrations/
|
||||
# Not all data source is supported on Datahub
|
||||
# We also get the database name from connection options https://github.com/getredash/redash/tree/master/redash/query_runner
|
||||
REDASH_DATA_SOURCE_TO_DATAHUB_MAP = {
|
||||
"athena": {"platform": "athena", "db_name_key": "schema"},
|
||||
"azure_kusto": {"platform": "kusto", "db_name_key": "database"},
|
||||
"bigquery": {"platform": "bigquery", "db_name_key": "projectId"},
|
||||
"Cassandra": {"platform": "external", "db_name_key": "keyspace"},
|
||||
"clickhouse": {"platform": "external", "db_name_key": "dbname"},
|
||||
"cockroach": {"platform": "external", "db_name_key": "db"},
|
||||
"couchbase": {"platform": "couchbase"},
|
||||
"db2": {"platform": "external", "db_name_key": "dbname"},
|
||||
"drill": {"platform": "external", "db_name_key": "dbname"},
|
||||
"druid": {"platform": "druid"},
|
||||
"hive_http": {"platform": "hive"},
|
||||
"hive": {"platform": "hive"},
|
||||
"impala": {"platform": "external", "db_name_key": "database"},
|
||||
"mapd": {"platform": "external", "db_name_key": "database"},
|
||||
"mongodb": {"platform": "mongodb", "db_name_key": "dbName"},
|
||||
"mssql": {"platform": "mssql", "db_name_key": "db"},
|
||||
"mysql": {"platform": "mysql", "db_name_key": "db"},
|
||||
"pg": {"platform": "postgres", "db_name_key": "dbname"},
|
||||
"phoenix": {"platform": "external", "db_name_key": "db"},
|
||||
"presto": {"platform": "presto", "db_name_key": "schema"},
|
||||
"qubole": {"platform": "external", "db_name_key": "cluster"},
|
||||
"rds_mysql": {"platform": "mysql", "db_name_key": "db"},
|
||||
"redshift": {"platform": "redshift", "db_name_key": "dbname"},
|
||||
"scylla": {"platform": "external", "db_name_key": "keyspace"},
|
||||
"snowflake": {"platform": "snowflake", "db_name_key": "database"},
|
||||
"sqlite": {"platform": "sqlite", "db_name_key": "db"},
|
||||
"treasuredata": {"platform": "external", "db_name_key": "db"},
|
||||
"vertica": {"platform": "vertica", "db_name_key": "database"},
|
||||
"results": {"platform": "external", "db_name_key": "name"},
|
||||
}
|
||||
|
||||
|
||||
# We assume the default chart type is TABLE
|
||||
DEFAULT_VISUALIZATION_TYPE = ChartTypeClass.TABLE
|
||||
|
||||
# https://github.com/getredash/redash/blob/master/viz-lib/src/visualizations/chart/Editor/ChartTypeSelect.tsx
|
||||
# TODO: add more mapping on ChartTypeClass
|
||||
PLOTLY_CHART_MAP = {
|
||||
# TODO: add more Plotly visualization mapping here
|
||||
# TODO: need to add more ChartTypeClass in datahub schema_classes.py
|
||||
"line": ChartTypeClass.LINE,
|
||||
"column": ChartTypeClass.BAR,
|
||||
"area": ChartTypeClass.AREA,
|
||||
"pie": ChartTypeClass.PIE,
|
||||
"scatter": ChartTypeClass.SCATTER,
|
||||
"bubble": None,
|
||||
"heatmap": None,
|
||||
"box": ChartTypeClass.BOX_PLOT,
|
||||
}
|
||||
|
||||
VISUALIZATION_TYPE_MAP = {
|
||||
# TODO: add more Redash visualization mapping here
|
||||
# https://redash.io/help/user-guide/visualizations/visualization-types
|
||||
# https://github.com/getredash/redash/blob/master/viz-lib/src/visualizations/registeredVisualizations.ts
|
||||
# TODO: need to add more ChartTypeClass in datahub schema_classes.py
|
||||
"BOXPLOT": ChartTypeClass.BOX_PLOT,
|
||||
"CHOROPLETH": None,
|
||||
"COUNTER": ChartTypeClass.TABLE,
|
||||
"DETAILS": ChartTypeClass.TABLE,
|
||||
"FUNNEL": None,
|
||||
"MAP": None,
|
||||
"PIVOT": ChartTypeClass.TABLE,
|
||||
"SANKEY": None,
|
||||
"SUNBURST_SEQUENCE": None,
|
||||
"TABLE": ChartTypeClass.TABLE,
|
||||
"WORD_CLOUD": None,
|
||||
}
|
||||
|
||||
|
||||
class RedashConfig(ConfigModel):
|
||||
# See the Redash API for details
|
||||
# https://redash.io/help/user-guide/integrations-and-api/api
|
||||
connect_uri: str = "http://localhost:5000"
|
||||
api_key: str = "REDASH_API_KEY"
|
||||
env: str = DEFAULT_ENV
|
||||
|
||||
# Optionals
|
||||
dashboard_patterns: AllowDenyPattern = AllowDenyPattern.allow_all()
|
||||
chart_patterns: AllowDenyPattern = AllowDenyPattern.allow_all()
|
||||
skip_draft: bool = True
|
||||
api_page_limit: int = sys.maxsize
|
||||
# parse_table_names_from_sql: bool = False # TODO: _get_upstream_lineage from SQL
|
||||
|
||||
|
||||
@dataclass
|
||||
class RedashSourceReport(SourceReport):
|
||||
items_scanned: int = 0
|
||||
filtered: List[str] = field(default_factory=list)
|
||||
|
||||
def report_item_scanned(self) -> None:
|
||||
self.items_scanned += 1
|
||||
|
||||
def report_dropped(self, item: str) -> None:
|
||||
self.filtered.append(item)
|
||||
|
||||
|
||||
class RedashSource(Source):
|
||||
config: RedashConfig
|
||||
report: RedashSourceReport
|
||||
platform = "redash"
|
||||
|
||||
def __init__(self, ctx: PipelineContext, config: RedashConfig):
|
||||
super().__init__(ctx)
|
||||
self.config = config
|
||||
self.report = RedashSourceReport()
|
||||
|
||||
# Handle trailing slash removal
|
||||
self.config.connect_uri = self.config.connect_uri.strip("/")
|
||||
|
||||
self.client = Redash(self.config.connect_uri, self.config.api_key)
|
||||
self.client.session.headers.update(
|
||||
{
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
)
|
||||
|
||||
self.api_page_limit = self.config.api_page_limit or math.inf
|
||||
|
||||
def test_connection(self) -> None:
|
||||
test_response = self.client._get(f"{self.config.connect_uri}/api")
|
||||
if test_response.status_code == 200:
|
||||
logger.info("Redash API connected succesfully")
|
||||
pass
|
||||
else:
|
||||
raise ValueError(f"Failed to connect to {self.config.connect_uri}/api")
|
||||
|
||||
@classmethod
|
||||
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
||||
config = RedashConfig.parse_obj(config_dict)
|
||||
return cls(ctx, config)
|
||||
|
||||
def _get_chart_data_source(self, data_source_id: int = None) -> Dict:
|
||||
url = f"/api/data_sources/{data_source_id}"
|
||||
resp = self.client._get(url).json()
|
||||
logger.debug(resp)
|
||||
return resp
|
||||
|
||||
def _get_datasource_urn_from_data_source(self, data_source: Dict) -> Optional[str]:
|
||||
data_source_type = data_source.get("type")
|
||||
data_source_name = data_source.get("name")
|
||||
data_source_options = data_source.get("options", {})
|
||||
|
||||
if data_source_type:
|
||||
map = REDASH_DATA_SOURCE_TO_DATAHUB_MAP.get(
|
||||
data_source_type, {"platform": DEFAULT_DATA_SOURCE_PLATFORM}
|
||||
)
|
||||
platform = map.get("platform")
|
||||
platform_urn = f"urn:li:dataPlatform:{platform}"
|
||||
|
||||
db_name_key = map.get("db_name_key", "db")
|
||||
db_name = data_source_options.get(db_name_key, DEFAULT_DATA_BASE_NAME)
|
||||
|
||||
# Redash Query Results
|
||||
if data_source_type == "results":
|
||||
dataset_urn = f"urn:li:dataset:({platform_urn},{data_source_name},{self.config.env})"
|
||||
return dataset_urn
|
||||
|
||||
# Other Redash supported data source as in REDASH_DATA_SOURCE_TO_DATAHUB_MAP
|
||||
if db_name:
|
||||
dataset_urn = (
|
||||
f"urn:li:dataset:({platform_urn},{db_name},{self.config.env})"
|
||||
)
|
||||
return dataset_urn
|
||||
return None
|
||||
|
||||
def _get_dashboard_description_from_widgets(
|
||||
self, dashboard_widgets: List[Dict]
|
||||
) -> str:
|
||||
description = ""
|
||||
|
||||
for widget in dashboard_widgets:
|
||||
visualization = widget.get("visualization")
|
||||
if visualization is None:
|
||||
options = widget.get("options")
|
||||
text = widget.get("text")
|
||||
isHidden = widget.get("isHidden")
|
||||
|
||||
# TRICKY: If top-left most widget is a Textbox, then we assume it is the Description
|
||||
if options and text and isHidden is None:
|
||||
position = options.get("position")
|
||||
if position:
|
||||
col = position.get("col")
|
||||
row = position.get("row")
|
||||
if col == 0 and row == 0:
|
||||
description = text
|
||||
else:
|
||||
continue
|
||||
|
||||
return description
|
||||
|
||||
def _get_dashboard_chart_urns_from_widgets(
|
||||
self, dashboard_widgets: List[Dict]
|
||||
) -> List[str]:
|
||||
chart_urns = []
|
||||
for widget in dashboard_widgets:
|
||||
# In Redash, chart is called visualization
|
||||
visualization = widget.get("visualization")
|
||||
if visualization:
|
||||
visualization_id = visualization.get("id", None)
|
||||
if visualization_id is not None:
|
||||
chart_urns.append(
|
||||
f"urn:li:chart:({self.platform},{visualization_id})"
|
||||
)
|
||||
|
||||
return chart_urns
|
||||
|
||||
def _get_dashboard_snapshot(self, dashboard_data):
|
||||
dashboard_id = dashboard_data["id"]
|
||||
dashboard_urn = f"urn:li:dashboard:({self.platform},{dashboard_id})"
|
||||
dashboard_snapshot = DashboardSnapshot(
|
||||
urn=dashboard_urn,
|
||||
aspects=[],
|
||||
)
|
||||
|
||||
modified_actor = f"urn:li:corpuser:{dashboard_data.get('changed_by', {}).get('username', 'unknown')}"
|
||||
modified_ts = int(
|
||||
dp.parse(dashboard_data.get("updated_at", "now")).timestamp() * 1000
|
||||
)
|
||||
title = dashboard_data.get("name", "")
|
||||
|
||||
last_modified = ChangeAuditStamps(
|
||||
created=AuditStamp(time=modified_ts, actor=modified_actor),
|
||||
lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
|
||||
)
|
||||
|
||||
dashboard_url = (
|
||||
f"{self.config.connect_uri}/dashboard/{dashboard_data.get('slug', '')}"
|
||||
)
|
||||
|
||||
widgets = dashboard_data.get("widgets", [])
|
||||
description = self._get_dashboard_description_from_widgets(widgets)
|
||||
chart_urns = self._get_dashboard_chart_urns_from_widgets(widgets)
|
||||
|
||||
dashboard_info = DashboardInfoClass(
|
||||
description=description,
|
||||
title=title,
|
||||
charts=chart_urns,
|
||||
lastModified=last_modified,
|
||||
dashboardUrl=dashboard_url,
|
||||
customProperties={},
|
||||
)
|
||||
dashboard_snapshot.aspects.append(dashboard_info)
|
||||
|
||||
return dashboard_snapshot
|
||||
|
||||
def _emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
|
||||
current_dashboards_page = 0
|
||||
skip_draft = self.config.skip_draft
|
||||
|
||||
# we will set total dashboards to the actual number after we get the response
|
||||
total_dashboards = PAGE_SIZE
|
||||
|
||||
while (
|
||||
current_dashboards_page * PAGE_SIZE <= total_dashboards
|
||||
and current_dashboards_page < self.api_page_limit
|
||||
):
|
||||
dashboards_response = self.client.dashboards(
|
||||
page=current_dashboards_page + 1, page_size=PAGE_SIZE
|
||||
)
|
||||
total_dashboards = dashboards_response.get("count") or 0
|
||||
current_dashboards_page += 1
|
||||
|
||||
logger.info(f"/api/dashboards on page {current_dashboards_page}")
|
||||
|
||||
for dashboard_response in dashboards_response["results"]:
|
||||
|
||||
dashboard_name = dashboard_response["name"]
|
||||
|
||||
self.report.report_item_scanned()
|
||||
|
||||
if (not self.config.dashboard_patterns.allowed(dashboard_name)) or (
|
||||
skip_draft and dashboard_response["is_draft"]
|
||||
):
|
||||
self.report.report_dropped(dashboard_name)
|
||||
continue
|
||||
|
||||
# Continue producing MCE
|
||||
dashboard_slug = dashboard_response["slug"]
|
||||
dashboard_data = self.client.dashboard(dashboard_slug)
|
||||
logger.debug(dashboard_data)
|
||||
dashboard_snapshot = self._get_dashboard_snapshot(dashboard_data)
|
||||
mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
|
||||
wu = MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce)
|
||||
self.report.report_workunit(wu)
|
||||
|
||||
yield wu
|
||||
|
||||
def _get_chart_type_from_viz_data(self, viz_data: Dict) -> str:
|
||||
"""
|
||||
https://redash.io/help/user-guide/visualizations/visualization-types
|
||||
Redash has multiple visualization types. Chart type is actually Plotly.
|
||||
So we need to check options returned by API, which series type is being used.
|
||||
"""
|
||||
viz_type = viz_data.get("type", "")
|
||||
viz_options = viz_data.get("options", {})
|
||||
globalSeriesType = viz_options.get("globalSeriesType", "")
|
||||
report_key = f"redash-chart-{viz_data['id']}"
|
||||
|
||||
# handle Plotly chart types
|
||||
if viz_type == "CHART":
|
||||
chart_type = PLOTLY_CHART_MAP.get(globalSeriesType)
|
||||
if chart_type is None:
|
||||
chart_type = DEFAULT_VISUALIZATION_TYPE
|
||||
message = f"ChartTypeClass for Redash Visualization Type={viz_type} with options.globalSeriesType={globalSeriesType} is missing. Setting to {DEFAULT_VISUALIZATION_TYPE}"
|
||||
self.report.report_warning(key=report_key, reason=message)
|
||||
logger.warning(message)
|
||||
else:
|
||||
chart_type = VISUALIZATION_TYPE_MAP.get(viz_type)
|
||||
if chart_type is None:
|
||||
chart_type = DEFAULT_VISUALIZATION_TYPE
|
||||
message = f"ChartTypeClass for Redash Visualization Type={viz_type} is missing. Setting to {DEFAULT_VISUALIZATION_TYPE}"
|
||||
self.report.report_warning(key=report_key, reason=message)
|
||||
logger.warning(message)
|
||||
|
||||
return chart_type
|
||||
|
||||
def _get_chart_snapshot(self, query_data: Dict, viz_data: Dict) -> ChartSnapshot:
|
||||
viz_id = viz_data["id"]
|
||||
chart_urn = f"urn:li:chart:({self.platform},{viz_id})"
|
||||
chart_snapshot = ChartSnapshot(
|
||||
urn=chart_urn,
|
||||
aspects=[],
|
||||
)
|
||||
|
||||
modified_actor = f"urn:li:corpuser:{viz_data.get('changed_by', {}).get('username', 'unknown')}"
|
||||
modified_ts = int(
|
||||
dp.parse(viz_data.get("updated_at", "now")).timestamp() * 1000
|
||||
)
|
||||
title = f"{query_data.get('name')} {viz_data.get('name', '')}"
|
||||
|
||||
last_modified = ChangeAuditStamps(
|
||||
created=AuditStamp(time=modified_ts, actor=modified_actor),
|
||||
lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
|
||||
)
|
||||
|
||||
# Getting chart type
|
||||
chart_type = self._get_chart_type_from_viz_data(viz_data)
|
||||
chart_url = f"{self.config.connect_uri}/queries/{query_data.get('id')}#{viz_id}"
|
||||
description = (
|
||||
viz_data.get("description", "") if viz_data.get("description", "") else ""
|
||||
)
|
||||
data_source_id = query_data.get("data_source_id")
|
||||
data_source = self._get_chart_data_source(data_source_id)
|
||||
data_source_type = data_source.get("type")
|
||||
|
||||
# TODO: Getting table lineage from SQL parsing
|
||||
# Currently we only get database level source from `data_source_id` which returns database name or Bigquery's projectId
|
||||
# query = query_data.get("query", "")
|
||||
datasource_urn = self._get_datasource_urn_from_data_source(data_source)
|
||||
|
||||
if not datasource_urn:
|
||||
self.report.report_warning(
|
||||
key=f"redash-chart-{viz_id}",
|
||||
reason=f"data_source_type={data_source_type} not yet implemented. Setting inputs to None",
|
||||
)
|
||||
|
||||
chart_info = ChartInfoClass(
|
||||
type=chart_type,
|
||||
description=description,
|
||||
title=title,
|
||||
lastModified=last_modified,
|
||||
chartUrl=chart_url,
|
||||
inputs=[
|
||||
datasource_urn,
|
||||
]
|
||||
if datasource_urn
|
||||
else None,
|
||||
)
|
||||
chart_snapshot.aspects.append(chart_info)
|
||||
|
||||
return chart_snapshot
|
||||
|
||||
def _emit_chart_mces(self) -> Iterable[MetadataWorkUnit]:
|
||||
current_queries_page = 0
|
||||
skip_draft = self.config.skip_draft
|
||||
|
||||
# we will set total charts to the actual number after we get the response
|
||||
total_queries = PAGE_SIZE
|
||||
|
||||
while (
|
||||
current_queries_page * PAGE_SIZE <= total_queries
|
||||
and current_queries_page < self.api_page_limit
|
||||
):
|
||||
queries_response = self.client.queries(
|
||||
page=current_queries_page + 1, page_size=PAGE_SIZE
|
||||
)
|
||||
current_queries_page += 1
|
||||
logger.info(f"/api/queries on page {current_queries_page}")
|
||||
|
||||
total_queries = queries_response["count"]
|
||||
for query_response in queries_response["results"]:
|
||||
|
||||
chart_name = query_response["name"]
|
||||
|
||||
self.report.report_item_scanned()
|
||||
|
||||
if (not self.config.chart_patterns.allowed(chart_name)) or (
|
||||
skip_draft and query_response["is_draft"]
|
||||
):
|
||||
self.report.report_dropped(chart_name)
|
||||
continue
|
||||
|
||||
query_id = query_response["id"]
|
||||
query_data = self.client._get(f"/api/queries/{query_id}").json()
|
||||
logger.debug(query_data)
|
||||
|
||||
# In Redash, chart is called visualization
|
||||
for visualization in query_data.get("visualizations", []):
|
||||
chart_snapshot = self._get_chart_snapshot(query_data, visualization)
|
||||
mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
|
||||
wu = MetadataWorkUnit(id=chart_snapshot.urn, mce=mce)
|
||||
self.report.report_workunit(wu)
|
||||
|
||||
yield wu
|
||||
|
||||
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
|
||||
self.test_connection()
|
||||
yield from self._emit_dashboard_mces()
|
||||
yield from self._emit_chart_mces()
|
||||
|
||||
def get_report(self) -> SourceReport:
|
||||
return self.report
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
565
metadata-ingestion/tests/unit/test_redash_source.py
Normal file
565
metadata-ingestion/tests/unit/test_redash_source.py
Normal file
@ -0,0 +1,565 @@
|
||||
from typing import Any, Dict
|
||||
from unittest.mock import patch
|
||||
|
||||
from datahub.ingestion.api.common import PipelineContext
|
||||
from datahub.ingestion.source.redash import RedashConfig, RedashSource
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
||||
AuditStamp,
|
||||
ChangeAuditStamps,
|
||||
)
|
||||
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
||||
ChartSnapshot,
|
||||
DashboardSnapshot,
|
||||
)
|
||||
from datahub.metadata.schema_classes import ChartInfoClass, DashboardInfoClass
|
||||
|
||||
mock_dashboard_response = {
|
||||
"tags": [],
|
||||
"is_archived": False,
|
||||
"updated_at": "2021-08-13T19:14:15.288Z",
|
||||
"is_favorite": False,
|
||||
"user": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:31:44.116Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:31:24Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"layout": [],
|
||||
"is_draft": False,
|
||||
"id": 3,
|
||||
"can_edit": True,
|
||||
"user_id": 1,
|
||||
"name": "My Dashboard",
|
||||
"created_at": "2021-08-13T19:13:07.408Z",
|
||||
"slug": "my-dashboard",
|
||||
"version": 2,
|
||||
"widgets": [
|
||||
{
|
||||
"visualization": {
|
||||
"description": "",
|
||||
"created_at": "2021-08-13T19:09:55.779Z",
|
||||
"updated_at": "2021-08-13T19:13:42.544Z",
|
||||
"id": 10,
|
||||
"query": {
|
||||
"user": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:31:44.116Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:31:24Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"created_at": "2021-08-13T18:57:33.074Z",
|
||||
"latest_query_data_id": 20,
|
||||
"schedule": None,
|
||||
"description": None,
|
||||
"tags": [],
|
||||
"updated_at": "2021-08-13T19:10:04.396Z",
|
||||
"last_modified_by": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:31:44.116Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:31:24Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"options": {"parameters": []},
|
||||
"is_safe": True,
|
||||
"version": 1,
|
||||
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
|
||||
"is_archived": False,
|
||||
"query": "SELECT\nmarried AS stage1, pet as stage2, happy as stage3, freq as value\nFROM (\nSELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,5 AS freq\nUNION ALL SELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,4 AS freq\nUNION ALL SELECT 'Yes' AS married,'No' AS pet,'Yes' AS happy,3 AS freq\nUNION ALL SELECT 'No' AS married,'Yes' AS pet,'Yes' AS happy,2 AS freq\nUNION ALL SELECT 'No' AS married,'No' AS pet,'No' AS happy,1 AS freq\n) t",
|
||||
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
|
||||
"is_draft": False,
|
||||
"id": 4,
|
||||
"data_source_id": 2,
|
||||
"name": "My Query",
|
||||
},
|
||||
"type": "CHART",
|
||||
"options": {
|
||||
"showDataLabels": True,
|
||||
"direction": {"type": "counterclockwise"},
|
||||
"missingValuesAsZero": True,
|
||||
"error_y": {"visible": True, "type": "data"},
|
||||
"numberFormat": "0,0[.]00000",
|
||||
"yAxis": [{"type": "linear"}, {"type": "linear", "opposite": True}],
|
||||
"series": {
|
||||
"stacking": None,
|
||||
"error_y": {"visible": True, "type": "data"},
|
||||
},
|
||||
"globalSeriesType": "pie",
|
||||
"percentFormat": "0[.]00%",
|
||||
"sortX": True,
|
||||
"seriesOptions": {
|
||||
"value": {"zIndex": 0, "index": 0, "type": "pie", "yAxis": 0}
|
||||
},
|
||||
"valuesOptions": {"Yes": {}, "No": {}},
|
||||
"xAxis": {"labels": {"enabled": True}, "type": "-"},
|
||||
"dateTimeFormat": "DD/MM/YY HH:mm",
|
||||
"columnMapping": {"stage1": "x", "value": "y"},
|
||||
"textFormat": "",
|
||||
"customCode": "// Available variables are x, ys, element, and Plotly\n// Type console.log(x, ys); for more info about x and ys\n// To plot your graph call Plotly.plot(element, ...)\n// Plotly examples and docs: https://plot.ly/javascript/",
|
||||
"legend": {"enabled": True},
|
||||
},
|
||||
"name": "Chart",
|
||||
},
|
||||
"text": "",
|
||||
"created_at": "2021-08-13T19:13:42.544Z",
|
||||
"updated_at": "2021-08-13T19:14:11.171Z",
|
||||
"options": {
|
||||
"parameterMappings": {},
|
||||
"isHidden": False,
|
||||
"position": {
|
||||
"autoHeight": False,
|
||||
"sizeX": 3,
|
||||
"sizeY": 14,
|
||||
"maxSizeY": 1000,
|
||||
"maxSizeX": 6,
|
||||
"minSizeY": 5,
|
||||
"minSizeX": 1,
|
||||
"col": 3,
|
||||
"row": 3,
|
||||
},
|
||||
},
|
||||
"dashboard_id": 3,
|
||||
"width": 1,
|
||||
"id": 11,
|
||||
},
|
||||
{
|
||||
"text": "My description",
|
||||
"created_at": "2021-08-13T19:13:17.453Z",
|
||||
"updated_at": "2021-08-13T19:13:22.165Z",
|
||||
"options": {
|
||||
"position": {
|
||||
"autoHeight": False,
|
||||
"sizeX": 6,
|
||||
"sizeY": 3,
|
||||
"maxSizeY": 1000,
|
||||
"maxSizeX": 6,
|
||||
"minSizeY": 1,
|
||||
"minSizeX": 1,
|
||||
"col": 0,
|
||||
"row": 0,
|
||||
},
|
||||
"isHidden": False,
|
||||
"parameterMappings": {},
|
||||
},
|
||||
"dashboard_id": 3,
|
||||
"width": 1,
|
||||
"id": 9,
|
||||
},
|
||||
{
|
||||
"visualization": {
|
||||
"description": "",
|
||||
"created_at": "2021-08-13T19:09:11.445Z",
|
||||
"updated_at": "2021-08-13T19:13:29.571Z",
|
||||
"id": 9,
|
||||
"query": {
|
||||
"user": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:31:44.116Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:31:24Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"created_at": "2021-08-13T18:57:33.074Z",
|
||||
"latest_query_data_id": 20,
|
||||
"schedule": None,
|
||||
"description": None,
|
||||
"tags": [],
|
||||
"updated_at": "2021-08-13T19:10:04.396Z",
|
||||
"last_modified_by": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:31:44.116Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:31:24Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"options": {"parameters": []},
|
||||
"is_safe": True,
|
||||
"version": 1,
|
||||
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
|
||||
"is_archived": False,
|
||||
"query": "SELECT\nmarried AS stage1, pet as stage2, happy as stage3, freq as value\nFROM (\nSELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,5 AS freq\nUNION ALL SELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,4 AS freq\nUNION ALL SELECT 'Yes' AS married,'No' AS pet,'Yes' AS happy,3 AS freq\nUNION ALL SELECT 'No' AS married,'Yes' AS pet,'Yes' AS happy,2 AS freq\nUNION ALL SELECT 'No' AS married,'No' AS pet,'No' AS happy,1 AS freq\n) t",
|
||||
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
|
||||
"is_draft": False,
|
||||
"id": 4,
|
||||
"data_source_id": 2,
|
||||
"name": "My Query",
|
||||
},
|
||||
"type": "SANKEY",
|
||||
"options": {},
|
||||
"name": "Sankey",
|
||||
},
|
||||
"text": "",
|
||||
"created_at": "2021-08-13T19:13:29.571Z",
|
||||
"updated_at": "2021-08-13T19:13:29.665Z",
|
||||
"options": {
|
||||
"parameterMappings": {},
|
||||
"isHidden": False,
|
||||
"position": {
|
||||
"autoHeight": False,
|
||||
"sizeX": 3,
|
||||
"sizeY": 7,
|
||||
"maxSizeY": 1000,
|
||||
"maxSizeX": 6,
|
||||
"minSizeY": 1,
|
||||
"minSizeX": 1,
|
||||
"col": 0,
|
||||
"row": 3,
|
||||
},
|
||||
},
|
||||
"dashboard_id": 3,
|
||||
"width": 1,
|
||||
"id": 10,
|
||||
},
|
||||
{
|
||||
"visualization": {
|
||||
"description": "",
|
||||
"created_at": "2021-08-13T18:57:33.074Z",
|
||||
"updated_at": "2021-08-13T19:13:51.175Z",
|
||||
"id": 8,
|
||||
"query": {
|
||||
"user": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:31:44.116Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:31:24Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"created_at": "2021-08-13T18:57:33.074Z",
|
||||
"latest_query_data_id": 20,
|
||||
"schedule": None,
|
||||
"description": None,
|
||||
"tags": [],
|
||||
"updated_at": "2021-08-13T19:10:04.396Z",
|
||||
"last_modified_by": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:31:44.116Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:31:24Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"options": {"parameters": []},
|
||||
"is_safe": True,
|
||||
"version": 1,
|
||||
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
|
||||
"is_archived": False,
|
||||
"query": "SELECT\nmarried AS stage1, pet as stage2, happy as stage3, freq as value\nFROM (\nSELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,5 AS freq\nUNION ALL SELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,4 AS freq\nUNION ALL SELECT 'Yes' AS married,'No' AS pet,'Yes' AS happy,3 AS freq\nUNION ALL SELECT 'No' AS married,'Yes' AS pet,'Yes' AS happy,2 AS freq\nUNION ALL SELECT 'No' AS married,'No' AS pet,'No' AS happy,1 AS freq\n) t",
|
||||
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
|
||||
"is_draft": False,
|
||||
"id": 4,
|
||||
"data_source_id": 2,
|
||||
"name": "My Query",
|
||||
},
|
||||
"type": "TABLE",
|
||||
"options": {},
|
||||
"name": "Table",
|
||||
},
|
||||
"text": "",
|
||||
"created_at": "2021-08-13T19:13:51.175Z",
|
||||
"updated_at": "2021-08-13T19:14:58.898Z",
|
||||
"options": {
|
||||
"parameterMappings": {},
|
||||
"isHidden": False,
|
||||
"position": {
|
||||
"autoHeight": False,
|
||||
"sizeX": 3,
|
||||
"sizeY": 7,
|
||||
"maxSizeY": 1000,
|
||||
"maxSizeX": 6,
|
||||
"minSizeY": 1,
|
||||
"minSizeX": 2,
|
||||
"col": 0,
|
||||
"row": 10,
|
||||
},
|
||||
},
|
||||
"dashboard_id": 3,
|
||||
"width": 1,
|
||||
"id": 12,
|
||||
},
|
||||
],
|
||||
"dashboard_filters_enabled": False,
|
||||
}
|
||||
mock_mysql_data_source_response = {
|
||||
"scheduled_queue_name": "scheduled_queries",
|
||||
"name": "mysql-rfam-public.ebi.ac.uk",
|
||||
"pause_reason": None,
|
||||
"queue_name": "queries",
|
||||
"syntax": "sql",
|
||||
"paused": 0,
|
||||
"options": {
|
||||
"passwd": "--------",
|
||||
"host": "mysql-rfam-public.ebi.ac.uk",
|
||||
"db": "Rfam",
|
||||
"port": 4497,
|
||||
"user": "rfamro",
|
||||
},
|
||||
"groups": {"2": False},
|
||||
"type": "mysql",
|
||||
"id": 2,
|
||||
}
|
||||
mock_chart_response: Dict[str, Any] = {
|
||||
"is_archived": False,
|
||||
"updated_at": "2021-08-13T19:10:04.396Z",
|
||||
"is_favorite": True,
|
||||
"query": "SELECT\nmarried AS stage1, pet as stage2, happy as stage3, freq as value\nFROM (\nSELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,5 AS freq\nUNION ALL SELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,4 AS freq\nUNION ALL SELECT 'Yes' AS married,'No' AS pet,'Yes' AS happy,3 AS freq\nUNION ALL SELECT 'No' AS married,'Yes' AS pet,'Yes' AS happy,2 AS freq\nUNION ALL SELECT 'No' AS married,'No' AS pet,'No' AS happy,1 AS freq\n) t",
|
||||
"id": 4,
|
||||
"description": None,
|
||||
"tags": [],
|
||||
"version": 1,
|
||||
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
|
||||
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
|
||||
"data_source_id": 2,
|
||||
"is_safe": True,
|
||||
"latest_query_data_id": 20,
|
||||
"schedule": None,
|
||||
"user": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:53:44.365Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:53:33Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"is_draft": False,
|
||||
"can_edit": True,
|
||||
"name": "My Query",
|
||||
"created_at": "2021-08-13T18:57:33.074Z",
|
||||
"last_modified_by": {
|
||||
"auth_type": "password",
|
||||
"is_disabled": False,
|
||||
"updated_at": "2021-08-13T19:53:44.365Z",
|
||||
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
|
||||
"is_invitation_pending": False,
|
||||
"groups": [1, 2],
|
||||
"id": 1,
|
||||
"name": "redash",
|
||||
"created_at": "2021-08-13T13:39:56.216Z",
|
||||
"disabled_at": None,
|
||||
"is_email_verified": True,
|
||||
"active_at": "2021-08-13T19:53:33Z",
|
||||
"email": "redash@example.com",
|
||||
},
|
||||
"visualizations": [
|
||||
{
|
||||
"description": "",
|
||||
"created_at": "2021-08-13T18:57:33.074Z",
|
||||
"updated_at": "2021-08-13T19:13:51.175Z",
|
||||
"id": 8,
|
||||
"type": "TABLE",
|
||||
"options": {},
|
||||
"name": "Table",
|
||||
},
|
||||
{
|
||||
"description": "",
|
||||
"created_at": "2021-08-13T19:09:11.445Z",
|
||||
"updated_at": "2021-08-13T19:13:29.571Z",
|
||||
"id": 9,
|
||||
"type": "SANKEY",
|
||||
"options": {},
|
||||
"name": "Sankey",
|
||||
},
|
||||
{
|
||||
"description": "",
|
||||
"created_at": "2021-08-13T19:09:55.779Z",
|
||||
"updated_at": "2021-08-13T19:13:42.544Z",
|
||||
"id": 10,
|
||||
"type": "CHART",
|
||||
"options": {
|
||||
"showDataLabels": True,
|
||||
"direction": {"type": "counterclockwise"},
|
||||
"missingValuesAsZero": True,
|
||||
"error_y": {"visible": True, "type": "data"},
|
||||
"numberFormat": "0,0[.]00000",
|
||||
"yAxis": [{"type": "linear"}, {"type": "linear", "opposite": True}],
|
||||
"series": {
|
||||
"stacking": None,
|
||||
"error_y": {"visible": True, "type": "data"},
|
||||
},
|
||||
"globalSeriesType": "pie",
|
||||
"percentFormat": "0[.]00%",
|
||||
"sortX": True,
|
||||
"seriesOptions": {
|
||||
"value": {"zIndex": 0, "index": 0, "type": "pie", "yAxis": 0}
|
||||
},
|
||||
"valuesOptions": {"Yes": {}, "No": {}},
|
||||
"xAxis": {"labels": {"enabled": True}, "type": "-"},
|
||||
"dateTimeFormat": "DD/MM/YY HH:mm",
|
||||
"columnMapping": {"stage1": "x", "value": "y"},
|
||||
"textFormat": "",
|
||||
"customCode": "// Available variables are x, ys, element, and Plotly\n// Type console.log(x, ys); for more info about x and ys\n// To plot your graph call Plotly.plot(element, ...)\n// Plotly examples and docs: https://plot.ly/javascript/",
|
||||
"legend": {"enabled": True},
|
||||
},
|
||||
"name": "Chart",
|
||||
},
|
||||
],
|
||||
"options": {"parameters": []},
|
||||
}
|
||||
|
||||
|
||||
def redash_source() -> RedashSource:
|
||||
return RedashSource(
|
||||
ctx=PipelineContext(run_id="redash-source-test"),
|
||||
config=RedashConfig(
|
||||
connect_uri="http://localhost:5000",
|
||||
api_key="REDASH_API_KEY",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_get_dashboard_snapshot():
|
||||
expected = DashboardSnapshot(
|
||||
urn="urn:li:dashboard:(redash,3)",
|
||||
aspects=[
|
||||
DashboardInfoClass(
|
||||
description="My description",
|
||||
title="My Dashboard",
|
||||
charts=[
|
||||
"urn:li:chart:(redash,10)",
|
||||
"urn:li:chart:(redash,9)",
|
||||
"urn:li:chart:(redash,8)",
|
||||
],
|
||||
lastModified=ChangeAuditStamps(
|
||||
created=AuditStamp(
|
||||
time=1628882055288, actor="urn:li:corpuser:unknown"
|
||||
),
|
||||
lastModified=AuditStamp(
|
||||
time=1628882055288, actor="urn:li:corpuser:unknown"
|
||||
),
|
||||
),
|
||||
dashboardUrl="http://localhost:5000/dashboard/my-dashboard",
|
||||
customProperties={},
|
||||
)
|
||||
],
|
||||
)
|
||||
result = redash_source()._get_dashboard_snapshot(mock_dashboard_response)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@patch("datahub.ingestion.source.redash.RedashSource._get_chart_data_source")
|
||||
def test_get_known_viz_chart_snapshot(mocked_data_source):
|
||||
mocked_data_source.return_value = mock_mysql_data_source_response
|
||||
expected = ChartSnapshot(
|
||||
urn="urn:li:chart:(redash,10)",
|
||||
aspects=[
|
||||
ChartInfoClass(
|
||||
customProperties={},
|
||||
externalUrl=None,
|
||||
title="My Query Chart",
|
||||
description="",
|
||||
lastModified=ChangeAuditStamps(
|
||||
created=AuditStamp(
|
||||
time=1628882022544, actor="urn:li:corpuser:unknown"
|
||||
),
|
||||
lastModified=AuditStamp(
|
||||
time=1628882022544, actor="urn:li:corpuser:unknown"
|
||||
),
|
||||
),
|
||||
chartUrl="http://localhost:5000/queries/4#10",
|
||||
inputs=["urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam,PROD)"],
|
||||
type="PIE",
|
||||
)
|
||||
],
|
||||
)
|
||||
viz_data = mock_chart_response.get("visualizations", [])[2]
|
||||
result = redash_source()._get_chart_snapshot(mock_chart_response, viz_data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@patch("datahub.ingestion.source.redash.RedashSource._get_chart_data_source")
|
||||
def test_get_unknown_viz_chart_snapshot(mocked_data_source):
|
||||
"""
|
||||
Testing with unmapped visualization type SANKEY
|
||||
"""
|
||||
mocked_data_source.return_value = mock_mysql_data_source_response
|
||||
expected = ChartSnapshot(
|
||||
urn="urn:li:chart:(redash,9)",
|
||||
aspects=[
|
||||
ChartInfoClass(
|
||||
customProperties={},
|
||||
externalUrl=None,
|
||||
title="My Query Sankey",
|
||||
description="",
|
||||
lastModified=ChangeAuditStamps(
|
||||
created=AuditStamp(
|
||||
time=1628882009571, actor="urn:li:corpuser:unknown"
|
||||
),
|
||||
lastModified=AuditStamp(
|
||||
time=1628882009571, actor="urn:li:corpuser:unknown"
|
||||
),
|
||||
),
|
||||
chartUrl="http://localhost:5000/queries/4#9",
|
||||
inputs=["urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam,PROD)"],
|
||||
type="TABLE",
|
||||
)
|
||||
],
|
||||
)
|
||||
viz_data = mock_chart_response.get("visualizations", [])[1]
|
||||
result = redash_source()._get_chart_snapshot(mock_chart_response, viz_data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
# TODO: Getting table lineage from SQL parsing test
|
||||
Loading…
x
Reference in New Issue
Block a user