datahub/metadata-ingestion/tests/unit/test_redash_source.py

727 lines
28 KiB
Python

from typing import Any, Dict
from unittest.mock import patch
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.source.redash import (
RedashConfig,
RedashSource,
get_full_qualified_name,
)
from datahub.metadata.com.linkedin.pegasus2avro.common import (
AuditStamp,
ChangeAuditStamps,
)
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
ChartSnapshot,
DashboardSnapshot,
)
from datahub.metadata.schema_classes import ChartInfoClass, DashboardInfoClass
mock_dashboard_response = {
"tags": [],
"is_archived": False,
"updated_at": "2021-08-13T19:14:15.288Z",
"is_favorite": False,
"user": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:31:44.116Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:31:24Z",
"email": "redash@example.com",
},
"layout": [],
"is_draft": False,
"id": 3,
"can_edit": True,
"user_id": 1,
"name": "My Dashboard",
"created_at": "2021-08-13T19:13:07.408Z",
"slug": "my-dashboard",
"version": 2,
"widgets": [
{
"visualization": {
"description": "",
"created_at": "2021-08-13T19:09:55.779Z",
"updated_at": "2021-08-13T19:13:42.544Z",
"id": 10,
"query": {
"user": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:31:44.116Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:31:24Z",
"email": "redash@example.com",
},
"created_at": "2021-08-13T18:57:33.074Z",
"latest_query_data_id": 20,
"schedule": None,
"description": None,
"tags": [],
"updated_at": "2021-08-13T19:10:04.396Z",
"last_modified_by": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:31:44.116Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:31:24Z",
"email": "redash@example.com",
},
"options": {"parameters": []},
"is_safe": True,
"version": 1,
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
"is_archived": False,
"query": "SELECT\nmarried AS stage1, pet as stage2, happy as stage3, freq as value\nFROM (\nSELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,5 AS freq\nUNION ALL SELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,4 AS freq\nUNION ALL SELECT 'Yes' AS married,'No' AS pet,'Yes' AS happy,3 AS freq\nUNION ALL SELECT 'No' AS married,'Yes' AS pet,'Yes' AS happy,2 AS freq\nUNION ALL SELECT 'No' AS married,'No' AS pet,'No' AS happy,1 AS freq\n) t",
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
"is_draft": False,
"id": 4,
"data_source_id": 2,
"name": "My Query",
},
"type": "CHART",
"options": {
"showDataLabels": True,
"direction": {"type": "counterclockwise"},
"missingValuesAsZero": True,
"error_y": {"visible": True, "type": "data"},
"numberFormat": "0,0[.]00000",
"yAxis": [{"type": "linear"}, {"type": "linear", "opposite": True}],
"series": {
"stacking": None,
"error_y": {"visible": True, "type": "data"},
},
"globalSeriesType": "pie",
"percentFormat": "0[.]00%",
"sortX": True,
"seriesOptions": {
"value": {"zIndex": 0, "index": 0, "type": "pie", "yAxis": 0}
},
"valuesOptions": {"Yes": {}, "No": {}},
"xAxis": {"labels": {"enabled": True}, "type": "-"},
"dateTimeFormat": "DD/MM/YY HH:mm",
"columnMapping": {"stage1": "x", "value": "y"},
"textFormat": "",
"customCode": "// Available variables are x, ys, element, and Plotly\n// Type console.log(x, ys); for more info about x and ys\n// To plot your graph call Plotly.plot(element, ...)\n// Plotly examples and docs: https://plot.ly/javascript/",
"legend": {"enabled": True},
},
"name": "Chart",
},
"text": "",
"created_at": "2021-08-13T19:13:42.544Z",
"updated_at": "2021-08-13T19:14:11.171Z",
"options": {
"parameterMappings": {},
"isHidden": False,
"position": {
"autoHeight": False,
"sizeX": 3,
"sizeY": 14,
"maxSizeY": 1000,
"maxSizeX": 6,
"minSizeY": 5,
"minSizeX": 1,
"col": 3,
"row": 3,
},
},
"dashboard_id": 3,
"width": 1,
"id": 11,
},
{
"text": "My description",
"created_at": "2021-08-13T19:13:17.453Z",
"updated_at": "2021-08-13T19:13:22.165Z",
"options": {
"position": {
"autoHeight": False,
"sizeX": 6,
"sizeY": 3,
"maxSizeY": 1000,
"maxSizeX": 6,
"minSizeY": 1,
"minSizeX": 1,
"col": 0,
"row": 0,
},
"isHidden": False,
"parameterMappings": {},
},
"dashboard_id": 3,
"width": 1,
"id": 9,
},
{
"visualization": {
"description": "",
"created_at": "2021-08-13T19:09:11.445Z",
"updated_at": "2021-08-13T19:13:29.571Z",
"id": 9,
"query": {
"user": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:31:44.116Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:31:24Z",
"email": "redash@example.com",
},
"created_at": "2021-08-13T18:57:33.074Z",
"latest_query_data_id": 20,
"schedule": None,
"description": None,
"tags": [],
"updated_at": "2021-08-13T19:10:04.396Z",
"last_modified_by": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:31:44.116Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:31:24Z",
"email": "redash@example.com",
},
"options": {"parameters": []},
"is_safe": True,
"version": 1,
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
"is_archived": False,
"query": "SELECT\nmarried AS stage1, pet as stage2, happy as stage3, freq as value\nFROM (\nSELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,5 AS freq\nUNION ALL SELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,4 AS freq\nUNION ALL SELECT 'Yes' AS married,'No' AS pet,'Yes' AS happy,3 AS freq\nUNION ALL SELECT 'No' AS married,'Yes' AS pet,'Yes' AS happy,2 AS freq\nUNION ALL SELECT 'No' AS married,'No' AS pet,'No' AS happy,1 AS freq\n) t",
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
"is_draft": False,
"id": 4,
"data_source_id": 2,
"name": "My Query",
},
"type": "SANKEY",
"options": {},
"name": "Sankey",
},
"text": "",
"created_at": "2021-08-13T19:13:29.571Z",
"updated_at": "2021-08-13T19:13:29.665Z",
"options": {
"parameterMappings": {},
"isHidden": False,
"position": {
"autoHeight": False,
"sizeX": 3,
"sizeY": 7,
"maxSizeY": 1000,
"maxSizeX": 6,
"minSizeY": 1,
"minSizeX": 1,
"col": 0,
"row": 3,
},
},
"dashboard_id": 3,
"width": 1,
"id": 10,
},
{
"visualization": {
"description": "",
"created_at": "2021-08-13T18:57:33.074Z",
"updated_at": "2021-08-13T19:13:51.175Z",
"id": 8,
"query": {
"user": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:31:44.116Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:31:24Z",
"email": "redash@example.com",
},
"created_at": "2021-08-13T18:57:33.074Z",
"latest_query_data_id": 20,
"schedule": None,
"description": None,
"tags": [],
"updated_at": "2021-08-13T19:10:04.396Z",
"last_modified_by": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:31:44.116Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:31:24Z",
"email": "redash@example.com",
},
"options": {"parameters": []},
"is_safe": True,
"version": 1,
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
"is_archived": False,
"query": "SELECT\nmarried AS stage1, pet as stage2, happy as stage3, freq as value\nFROM (\nSELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,5 AS freq\nUNION ALL SELECT 'Yes' AS married,'Yes' AS pet,'Yes' AS happy,4 AS freq\nUNION ALL SELECT 'Yes' AS married,'No' AS pet,'Yes' AS happy,3 AS freq\nUNION ALL SELECT 'No' AS married,'Yes' AS pet,'Yes' AS happy,2 AS freq\nUNION ALL SELECT 'No' AS married,'No' AS pet,'No' AS happy,1 AS freq\n) t",
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
"is_draft": False,
"id": 4,
"data_source_id": 2,
"name": "My Query",
},
"type": "TABLE",
"options": {},
"name": "Table",
},
"text": "",
"created_at": "2021-08-13T19:13:51.175Z",
"updated_at": "2021-08-13T19:14:58.898Z",
"options": {
"parameterMappings": {},
"isHidden": False,
"position": {
"autoHeight": False,
"sizeX": 3,
"sizeY": 7,
"maxSizeY": 1000,
"maxSizeX": 6,
"minSizeY": 1,
"minSizeX": 2,
"col": 0,
"row": 10,
},
},
"dashboard_id": 3,
"width": 1,
"id": 12,
},
],
"dashboard_filters_enabled": False,
}
mock_mysql_data_source_response = {
"scheduled_queue_name": "scheduled_queries",
"name": "mysql-rfam-public.ebi.ac.uk",
"pause_reason": None,
"queue_name": "queries",
"syntax": "sql",
"paused": 0,
"options": {
"passwd": "--------",
"host": "mysql-rfam-public.ebi.ac.uk",
"db": "Rfam",
"port": 4497,
"user": "rfamro",
},
"groups": {"2": False},
"type": "mysql",
"id": 2,
}
mock_chart_response: Dict[str, Any] = {
"is_archived": False,
"updated_at": "2021-08-13T19:10:04.396Z",
"is_favorite": True,
"query": "SELECT\n name,\n SUM(quantity * list_price * (1 - discount)) AS total,\n YEAR(order_date) as order_year\n FROM\n `orders` o\n INNER JOIN `order_items` i ON i.order_id = o.order_id\nINNER JOIN `staffs` s ON s.staff_id = o.staff_id\nGROUP BY\nname,\nyear(order_date)",
"id": 4,
"description": None,
"tags": [],
"version": 1,
"query_hash": "f709ca3a345e6fa2b7d00e005c8c3185",
"api_key": "3MJOZjtshCa2mt3O4x6pzWNKMWcrLIOq5O0u6AVU",
"data_source_id": 2,
"is_safe": True,
"latest_query_data_id": 20,
"schedule": None,
"user": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:53:44.365Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:53:33Z",
"email": "redash@example.com",
},
"is_draft": False,
"can_edit": True,
"name": "My Query",
"created_at": "2021-08-13T18:57:33.074Z",
"last_modified_by": {
"auth_type": "password",
"is_disabled": False,
"updated_at": "2021-08-13T19:53:44.365Z",
"profile_image_url": "https://www.gravatar.com/avatar/db00ae5315ea20071d35b08e959b328e?s=40&d=identicon",
"is_invitation_pending": False,
"groups": [1, 2],
"id": 1,
"name": "redash",
"created_at": "2021-08-13T13:39:56.216Z",
"disabled_at": None,
"is_email_verified": True,
"active_at": "2021-08-13T19:53:33Z",
"email": "redash@example.com",
},
"visualizations": [
{
"description": "",
"created_at": "2021-08-13T18:57:33.074Z",
"updated_at": "2021-08-13T19:13:51.175Z",
"id": 8,
"type": "TABLE",
"options": {},
"name": "Table",
},
{
"description": "",
"created_at": "2021-08-13T19:09:11.445Z",
"updated_at": "2021-08-13T19:13:29.571Z",
"id": 9,
"type": "SANKEY",
"options": {},
"name": "Sankey",
},
{
"description": "",
"created_at": "2021-08-13T19:09:55.779Z",
"updated_at": "2021-08-13T19:13:42.544Z",
"id": 10,
"type": "CHART",
"options": {
"showDataLabels": True,
"direction": {"type": "counterclockwise"},
"missingValuesAsZero": True,
"error_y": {"visible": True, "type": "data"},
"numberFormat": "0,0[.]00000",
"yAxis": [{"type": "linear"}, {"type": "linear", "opposite": True}],
"series": {
"stacking": None,
"error_y": {"visible": True, "type": "data"},
},
"globalSeriesType": "pie",
"percentFormat": "0[.]00%",
"sortX": True,
"seriesOptions": {
"value": {"zIndex": 0, "index": 0, "type": "pie", "yAxis": 0}
},
"valuesOptions": {"Yes": {}, "No": {}},
"xAxis": {"labels": {"enabled": True}, "type": "-"},
"dateTimeFormat": "DD/MM/YY HH:mm",
"columnMapping": {"stage1": "x", "value": "y"},
"textFormat": "",
"customCode": "// Available variables are x, ys, element, and Plotly\n// Type console.log(x, ys); for more info about x and ys\n// To plot your graph call Plotly.plot(element, ...)\n// Plotly examples and docs: https://plot.ly/javascript/",
"legend": {"enabled": True},
},
"name": "Chart",
},
],
"options": {"parameters": []},
}
def redash_source() -> RedashSource:
return RedashSource(
ctx=PipelineContext(run_id="redash-source-test"),
config=RedashConfig(
connect_uri="http://localhost:5000",
api_key="REDASH_API_KEY",
parse_table_names_from_sql=False,
),
)
def test_get_dashboard_snapshot_before_v10():
expected = DashboardSnapshot(
urn="urn:li:dashboard:(redash,3)",
aspects=[
DashboardInfoClass(
description="My description",
title="My Dashboard",
charts=[
"urn:li:chart:(redash,10)",
"urn:li:chart:(redash,9)",
"urn:li:chart:(redash,8)",
],
datasets=[],
lastModified=ChangeAuditStamps(
created=None,
lastModified=AuditStamp(
time=1628882055288, actor="urn:li:corpuser:unknown"
),
),
dashboardUrl="http://localhost:5000/dashboard/my-dashboard",
customProperties={},
)
],
)
result = redash_source()._get_dashboard_snapshot(
mock_dashboard_response, "9.0.0-beta"
)
assert result == expected
def test_get_dashboard_snapshot_after_v10():
expected = DashboardSnapshot(
urn="urn:li:dashboard:(redash,3)",
aspects=[
DashboardInfoClass(
description="My description",
title="My Dashboard",
charts=[
"urn:li:chart:(redash,10)",
"urn:li:chart:(redash,9)",
"urn:li:chart:(redash,8)",
],
datasets=[],
lastModified=ChangeAuditStamps(
created=None,
lastModified=AuditStamp(
time=1628882055288, actor="urn:li:corpuser:unknown"
),
),
dashboardUrl="http://localhost:5000/dashboards/3",
customProperties={},
)
],
)
result = redash_source()._get_dashboard_snapshot(
mock_dashboard_response, "10.0.0-beta"
)
assert result == expected
@patch("datahub.ingestion.source.redash.RedashSource._get_chart_data_source")
def test_get_known_viz_chart_snapshot(mocked_data_source):
mocked_data_source.return_value = mock_mysql_data_source_response
expected = ChartSnapshot(
urn="urn:li:chart:(redash,10)",
aspects=[
ChartInfoClass(
customProperties={},
externalUrl=None,
title="My Query Chart",
description="",
lastModified=ChangeAuditStamps(
created=None,
lastModified=AuditStamp(
time=1628882022544, actor="urn:li:corpuser:unknown"
),
),
chartUrl="http://localhost:5000/queries/4#10",
inputs=["urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam,PROD)"],
type="PIE",
)
],
)
viz_data = mock_chart_response.get("visualizations", [])[2]
result = redash_source()._get_chart_snapshot(mock_chart_response, viz_data)
assert result == expected
@patch("datahub.ingestion.source.redash.RedashSource._get_chart_data_source")
def test_get_unknown_viz_chart_snapshot(mocked_data_source):
"""
Testing with unmapped visualization type SANKEY
"""
mocked_data_source.return_value = mock_mysql_data_source_response
expected = ChartSnapshot(
urn="urn:li:chart:(redash,9)",
aspects=[
ChartInfoClass(
customProperties={},
externalUrl=None,
title="My Query Sankey",
description="",
lastModified=ChangeAuditStamps(
created=None,
lastModified=AuditStamp(
time=1628882009571, actor="urn:li:corpuser:unknown"
),
),
chartUrl="http://localhost:5000/queries/4#9",
inputs=["urn:li:dataset:(urn:li:dataPlatform:mysql,Rfam,PROD)"],
type="TABLE",
)
],
)
viz_data = mock_chart_response.get("visualizations", [])[1]
result = redash_source()._get_chart_snapshot(mock_chart_response, viz_data)
assert result == expected
# TODO: Getting table lineage from SQL parsing test
def test_get_full_qualified_name():
test_sql_table_names = [
{
"platform": "postgres",
"database_name": "postgres_db",
"table_name": "orders",
"full_qualified_table_name": "postgres_db.public.orders",
},
{
"platform": "postgres",
"database_name": "postgres_db",
"table_name": "schema.orders",
"full_qualified_table_name": "postgres_db.schema.orders",
},
{
"platform": "postgres",
"database_name": "postgres_db",
"table_name": "other_db.schema.orders",
"full_qualified_table_name": "other_db.schema.orders",
},
{
"platform": "mysql",
"database_name": "mysql_db",
"table_name": "orders",
"full_qualified_table_name": "mysql_db.orders",
},
{
"platform": "mysql",
"database_name": "mysql_db",
"table_name": "other_schema.orders",
"full_qualified_table_name": "other_schema.orders",
},
{
"platform": "bigquery",
"database_name": "projectId",
"table_name": "dataset.table",
"full_qualified_table_name": "projectId.dataset.table",
},
{
"platform": "bigquery",
"database_name": "projectId",
"table_name": "projectIdOther.dataset2.table",
"full_qualified_table_name": "projectIdOther.dataset2.table",
},
{
"platform": "mssql",
"database_name": "AdventureWork",
"table_name": "dbo.Sale Order",
"full_qualified_table_name": "AdventureWork.dbo.Sale Order",
},
{
"platform": "mssql",
"database_name": "AdventureWork",
"table_name": "SaleOrder",
"full_qualified_table_name": "AdventureWork.dbo.SaleOrder",
},
{
"platform": "mssql",
"database_name": "AdventureWork",
"table_name": "OtherDB.dbo.Sale Order",
"full_qualified_table_name": "OtherDB.dbo.Sale Order",
},
]
expected = list()
result = list()
for sql_table_name in test_sql_table_names:
platform = sql_table_name["platform"]
database_name = sql_table_name["database_name"]
table_name = sql_table_name["table_name"]
expected.append(sql_table_name["full_qualified_table_name"])
result.append(
get_full_qualified_name(
platform=platform, database_name=database_name, table_name=table_name
)
)
assert expected == result
def redash_source_parse_table_names_from_sql() -> RedashSource:
return RedashSource(
ctx=PipelineContext(run_id="redash-source-test"),
config=RedashConfig(
connect_uri="http://localhost:5000",
api_key="REDASH_API_KEY",
parse_table_names_from_sql=True,
),
)
@patch("datahub.ingestion.source.redash.RedashSource._get_chart_data_source")
def test_get_chart_snapshot_parse_table_names_from_sql(mocked_data_source):
mocked_data_source.return_value = mock_mysql_data_source_response
expected = ChartSnapshot(
urn="urn:li:chart:(redash,10)",
aspects=[
ChartInfoClass(
customProperties={},
externalUrl=None,
title="My Query Chart",
description="",
lastModified=ChangeAuditStamps(
created=None,
lastModified=AuditStamp(
time=1628882022544, actor="urn:li:corpuser:unknown"
),
),
chartUrl="http://localhost:5000/queries/4#10",
inputs=[
"urn:li:dataset:(urn:li:dataPlatform:mysql,rfam.order_items,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:mysql,rfam.orders,PROD)",
"urn:li:dataset:(urn:li:dataPlatform:mysql,rfam.staffs,PROD)",
],
type="PIE",
)
],
)
viz_data = mock_chart_response.get("visualizations", [])[2]
result = redash_source_parse_table_names_from_sql()._get_chart_snapshot(
mock_chart_response, viz_data
)
assert result == expected