mirror of
https://github.com/datahub-project/datahub.git
synced 2025-09-01 05:13:15 +00:00
fix(ingestion): Tableau test case execution fix (#6005)
This commit is contained in:
parent
1d45b81748
commit
c062bf2201
@ -22236,7 +22236,7 @@
|
|||||||
"changeType": "UPSERT",
|
"changeType": "UPSERT",
|
||||||
"aspectName": "upstreamLineage",
|
"aspectName": "upstreamLineage",
|
||||||
"aspect": {
|
"aspect": {
|
||||||
"value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.people,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.returns,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.orders,PROD)\", \"type\": \"TRANSFORMED\"}]}",
|
"value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.people,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.returns,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.orders,PROD)\", \"type\": \"TRANSFORMED\"}]}",
|
||||||
"contentType": "application/json"
|
"contentType": "application/json"
|
||||||
},
|
},
|
||||||
"systemMetadata": {
|
"systemMetadata": {
|
||||||
@ -31694,7 +31694,7 @@
|
|||||||
{
|
{
|
||||||
"proposedSnapshot": {
|
"proposedSnapshot": {
|
||||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.people,PROD)",
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.people,PROD)",
|
||||||
"aspects": [
|
"aspects": [
|
||||||
{
|
{
|
||||||
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
||||||
@ -31763,7 +31763,7 @@
|
|||||||
{
|
{
|
||||||
"proposedSnapshot": {
|
"proposedSnapshot": {
|
||||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.returns,PROD)",
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.returns,PROD)",
|
||||||
"aspects": [
|
"aspects": [
|
||||||
{
|
{
|
||||||
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
||||||
@ -31832,7 +31832,7 @@
|
|||||||
{
|
{
|
||||||
"proposedSnapshot": {
|
"proposedSnapshot": {
|
||||||
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
|
||||||
"urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.orders,PROD)",
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.orders,PROD)",
|
||||||
"aspects": [
|
"aspects": [
|
||||||
{
|
{
|
||||||
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
"com.linkedin.pegasus2avro.common.BrowsePaths": {
|
||||||
|
@ -1,134 +0,0 @@
|
|||||||
import json
|
|
||||||
import pathlib
|
|
||||||
from unittest import mock
|
|
||||||
|
|
||||||
from tableauserverclient.models import ViewItem
|
|
||||||
|
|
||||||
from datahub.ingestion.run.pipeline import Pipeline
|
|
||||||
from tests.test_helpers import mce_helpers
|
|
||||||
|
|
||||||
FROZEN_TIME = "2021-12-07 07:00:00"
|
|
||||||
|
|
||||||
test_resources_dir = None
|
|
||||||
|
|
||||||
|
|
||||||
def _read_response(file_name):
|
|
||||||
response_json_path = f"{test_resources_dir}/setup/{file_name}"
|
|
||||||
with open(response_json_path) as file:
|
|
||||||
data = json.loads(file.read())
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def define_query_metadata_func(workbook_0: str, workbook_all: str): # type: ignore
|
|
||||||
def side_effect_query_metadata(query):
|
|
||||||
|
|
||||||
if "workbooksConnection (first:0" in query:
|
|
||||||
return _read_response(workbook_0)
|
|
||||||
|
|
||||||
if "workbooksConnection (first:3" in query:
|
|
||||||
return _read_response(workbook_all)
|
|
||||||
|
|
||||||
if "embeddedDatasourcesConnection (first:0" in query:
|
|
||||||
return _read_response("embeddedDatasourcesConnection_0.json")
|
|
||||||
|
|
||||||
if "embeddedDatasourcesConnection (first:8" in query:
|
|
||||||
return _read_response("embeddedDatasourcesConnection_all.json")
|
|
||||||
|
|
||||||
if "publishedDatasourcesConnection (first:0" in query:
|
|
||||||
return _read_response("publishedDatasourcesConnection_0.json")
|
|
||||||
|
|
||||||
if "publishedDatasourcesConnection (first:2" in query:
|
|
||||||
return _read_response("publishedDatasourcesConnection_all.json")
|
|
||||||
|
|
||||||
if "customSQLTablesConnection (first:0" in query:
|
|
||||||
return _read_response("customSQLTablesConnection_0.json")
|
|
||||||
|
|
||||||
if "customSQLTablesConnection (first:2" in query:
|
|
||||||
return _read_response("customSQLTablesConnection_all.json")
|
|
||||||
|
|
||||||
return side_effect_query_metadata
|
|
||||||
|
|
||||||
|
|
||||||
def side_effect_usage_stat(*arg, **kwargs):
|
|
||||||
mock_pagination = mock.MagicMock()
|
|
||||||
mock_pagination.total_available = None
|
|
||||||
|
|
||||||
dashboard_stat: ViewItem = ViewItem()
|
|
||||||
|
|
||||||
# Added as luid of Dashboard in workbooksConnection_state_all.json
|
|
||||||
dashboard_stat._id = "fc9ea488-f810-4fa8-ac19-aa96018b5d66"
|
|
||||||
dashboard_stat._total_views = 3
|
|
||||||
|
|
||||||
# Added as luid of Sheet in workbooksConnection_state_all.json
|
|
||||||
sheet_stat: ViewItem = ViewItem()
|
|
||||||
sheet_stat._id = "f0779f9d-6765-47a9-a8f6-c740cfd27783"
|
|
||||||
sheet_stat._total_views = 5
|
|
||||||
|
|
||||||
return [dashboard_stat, sheet_stat], mock_pagination
|
|
||||||
|
|
||||||
|
|
||||||
def tableau_ingest_common(
|
|
||||||
pytestconfig,
|
|
||||||
tmp_path,
|
|
||||||
side_effect_query_metadata_func,
|
|
||||||
golden_file_name,
|
|
||||||
output_file_name,
|
|
||||||
):
|
|
||||||
global test_resources_dir
|
|
||||||
test_resources_dir = pathlib.Path(
|
|
||||||
pytestconfig.rootpath / "tests/integration/tableau"
|
|
||||||
)
|
|
||||||
|
|
||||||
with mock.patch("tableauserverclient.Server") as mock_sdk:
|
|
||||||
mock_client = mock.Mock()
|
|
||||||
mocked_metadata = mock.Mock()
|
|
||||||
mocked_metadata.query.side_effect = side_effect_query_metadata_func
|
|
||||||
mock_client.metadata = mocked_metadata
|
|
||||||
mock_client.auth = mock.Mock()
|
|
||||||
mock_client.views = mock.Mock()
|
|
||||||
mock_client.views.get.side_effect = side_effect_usage_stat
|
|
||||||
mock_client.auth.sign_in.return_value = None
|
|
||||||
mock_client.auth.sign_out.return_value = None
|
|
||||||
mock_sdk.return_value = mock_client
|
|
||||||
mock_sdk._auth_token = "ABC"
|
|
||||||
|
|
||||||
pipeline = Pipeline.create(
|
|
||||||
{
|
|
||||||
"run_id": "tableau-test",
|
|
||||||
"source": {
|
|
||||||
"type": "tableau",
|
|
||||||
"config": {
|
|
||||||
"username": "username",
|
|
||||||
"password": "pass`",
|
|
||||||
"connect_uri": "https://do-not-connect",
|
|
||||||
"site": "acryl",
|
|
||||||
"projects": ["default", "Project 2"],
|
|
||||||
"page_size": 10,
|
|
||||||
"ingest_tags": True,
|
|
||||||
"ingest_owner": True,
|
|
||||||
"ingest_tables_external": True,
|
|
||||||
"default_schema_map": {
|
|
||||||
"dvdrental": "public",
|
|
||||||
"someotherdb": "schema",
|
|
||||||
},
|
|
||||||
"platform_instance_map": {"postgres": "demo_postgres_instance"},
|
|
||||||
"extract_usage_stats": True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"sink": {
|
|
||||||
"type": "file",
|
|
||||||
"config": {
|
|
||||||
"filename": f"{tmp_path}/{output_file_name}",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
pipeline.run()
|
|
||||||
pipeline.raise_from_status()
|
|
||||||
|
|
||||||
mce_helpers.check_golden_file(
|
|
||||||
pytestconfig,
|
|
||||||
output_path=f"{tmp_path}/{output_file_name}",
|
|
||||||
golden_path=test_resources_dir / golden_file_name,
|
|
||||||
ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,
|
|
||||||
)
|
|
@ -1,27 +1,154 @@
|
|||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import test_tableau_common
|
|
||||||
from freezegun import freeze_time
|
from freezegun import freeze_time
|
||||||
|
from tableauserverclient.models import ViewItem
|
||||||
|
|
||||||
from datahub.configuration.source_common import DEFAULT_ENV
|
from datahub.configuration.source_common import DEFAULT_ENV
|
||||||
|
from datahub.ingestion.run.pipeline import Pipeline
|
||||||
from datahub.ingestion.source.tableau_common import (
|
from datahub.ingestion.source.tableau_common import (
|
||||||
TableauLineageOverrides,
|
TableauLineageOverrides,
|
||||||
make_table_urn,
|
make_table_urn,
|
||||||
)
|
)
|
||||||
|
from tests.test_helpers import mce_helpers
|
||||||
|
|
||||||
FROZEN_TIME = "2021-12-07 07:00:00"
|
FROZEN_TIME = "2021-12-07 07:00:00"
|
||||||
|
|
||||||
test_resources_dir = None
|
test_resources_dir = None
|
||||||
|
|
||||||
|
|
||||||
|
def _read_response(file_name):
|
||||||
|
response_json_path = f"{test_resources_dir}/setup/{file_name}"
|
||||||
|
with open(response_json_path) as file:
|
||||||
|
data = json.loads(file.read())
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def define_query_metadata_func(workbook_0: str, workbook_all: str): # type: ignore
|
||||||
|
def side_effect_query_metadata(query):
|
||||||
|
if "workbooksConnection (first:0" in query:
|
||||||
|
return _read_response(workbook_0)
|
||||||
|
|
||||||
|
if "workbooksConnection (first:3" in query:
|
||||||
|
return _read_response(workbook_all)
|
||||||
|
|
||||||
|
if "embeddedDatasourcesConnection (first:0" in query:
|
||||||
|
return _read_response("embeddedDatasourcesConnection_0.json")
|
||||||
|
|
||||||
|
if "embeddedDatasourcesConnection (first:8" in query:
|
||||||
|
return _read_response("embeddedDatasourcesConnection_all.json")
|
||||||
|
|
||||||
|
if "publishedDatasourcesConnection (first:0" in query:
|
||||||
|
return _read_response("publishedDatasourcesConnection_0.json")
|
||||||
|
|
||||||
|
if "publishedDatasourcesConnection (first:2" in query:
|
||||||
|
return _read_response("publishedDatasourcesConnection_all.json")
|
||||||
|
|
||||||
|
if "customSQLTablesConnection (first:0" in query:
|
||||||
|
return _read_response("customSQLTablesConnection_0.json")
|
||||||
|
|
||||||
|
if "customSQLTablesConnection (first:2" in query:
|
||||||
|
return _read_response("customSQLTablesConnection_all.json")
|
||||||
|
|
||||||
|
return side_effect_query_metadata
|
||||||
|
|
||||||
|
|
||||||
|
def side_effect_usage_stat(*arg, **kwargs):
|
||||||
|
mock_pagination = mock.MagicMock()
|
||||||
|
mock_pagination.total_available = None
|
||||||
|
|
||||||
|
dashboard_stat: ViewItem = ViewItem()
|
||||||
|
|
||||||
|
# Added as luid of Dashboard in workbooksConnection_state_all.json
|
||||||
|
dashboard_stat._id = "fc9ea488-f810-4fa8-ac19-aa96018b5d66"
|
||||||
|
dashboard_stat._total_views = 3
|
||||||
|
|
||||||
|
# Added as luid of Sheet in workbooksConnection_state_all.json
|
||||||
|
sheet_stat: ViewItem = ViewItem()
|
||||||
|
sheet_stat._id = "f0779f9d-6765-47a9-a8f6-c740cfd27783"
|
||||||
|
sheet_stat._total_views = 5
|
||||||
|
|
||||||
|
return [dashboard_stat, sheet_stat], mock_pagination
|
||||||
|
|
||||||
|
|
||||||
|
def tableau_ingest_common(
|
||||||
|
pytestconfig,
|
||||||
|
tmp_path,
|
||||||
|
side_effect_query_metadata_func,
|
||||||
|
golden_file_name,
|
||||||
|
output_file_name,
|
||||||
|
):
|
||||||
|
global test_resources_dir
|
||||||
|
test_resources_dir = pathlib.Path(
|
||||||
|
pytestconfig.rootpath / "tests/integration/tableau"
|
||||||
|
)
|
||||||
|
|
||||||
|
with mock.patch("datahub.ingestion.source.tableau.Server") as mock_sdk:
|
||||||
|
mock_client = mock.Mock()
|
||||||
|
mocked_metadata = mock.Mock()
|
||||||
|
mocked_metadata.query.side_effect = side_effect_query_metadata_func
|
||||||
|
mock_client.metadata = mocked_metadata
|
||||||
|
mock_client.auth = mock.Mock()
|
||||||
|
mock_client.views = mock.Mock()
|
||||||
|
mock_client.views.get.side_effect = side_effect_usage_stat
|
||||||
|
mock_client.auth.sign_in.return_value = None
|
||||||
|
mock_client.auth.sign_out.return_value = None
|
||||||
|
mock_sdk.return_value = mock_client
|
||||||
|
mock_sdk._auth_token = "ABC"
|
||||||
|
|
||||||
|
pipeline = Pipeline.create(
|
||||||
|
{
|
||||||
|
"run_id": "tableau-test",
|
||||||
|
"source": {
|
||||||
|
"type": "tableau",
|
||||||
|
"config": {
|
||||||
|
"username": "username",
|
||||||
|
"password": "pass`",
|
||||||
|
"connect_uri": "https://do-not-connect",
|
||||||
|
"site": "acryl",
|
||||||
|
"projects": ["default", "Project 2"],
|
||||||
|
"page_size": 10,
|
||||||
|
"ingest_tags": True,
|
||||||
|
"ingest_owner": True,
|
||||||
|
"ingest_tables_external": True,
|
||||||
|
"default_schema_map": {
|
||||||
|
"dvdrental": "public",
|
||||||
|
"someotherdb": "schema",
|
||||||
|
},
|
||||||
|
"platform_instance_map": {"postgres": "demo_postgres_instance"},
|
||||||
|
"extract_usage_stats": True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"sink": {
|
||||||
|
"type": "file",
|
||||||
|
"config": {
|
||||||
|
"filename": f"{tmp_path}/{output_file_name}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
pipeline.run()
|
||||||
|
pipeline.raise_from_status()
|
||||||
|
|
||||||
|
mce_helpers.check_golden_file(
|
||||||
|
pytestconfig,
|
||||||
|
output_path=f"{tmp_path}/{output_file_name}",
|
||||||
|
golden_path=test_resources_dir / golden_file_name,
|
||||||
|
ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@freeze_time(FROZEN_TIME)
|
@freeze_time(FROZEN_TIME)
|
||||||
@pytest.mark.slow_unit
|
@pytest.mark.slow_unit
|
||||||
def test_tableau_ingest(pytestconfig, tmp_path):
|
def test_tableau_ingest(pytestconfig, tmp_path):
|
||||||
output_file_name: str = "tableau_mces.json"
|
output_file_name: str = "tableau_mces.json"
|
||||||
golden_file_name: str = "tableau_mces_golden.json"
|
golden_file_name: str = "tableau_mces_golden.json"
|
||||||
side_effect_query_metadata = test_tableau_common.define_query_metadata_func(
|
side_effect_query_metadata = define_query_metadata_func(
|
||||||
"workbooksConnection_0.json", "workbooksConnection_all.json"
|
"workbooksConnection_0.json", "workbooksConnection_all.json"
|
||||||
)
|
)
|
||||||
test_tableau_common.tableau_ingest_common(
|
tableau_ingest_common(
|
||||||
pytestconfig,
|
pytestconfig,
|
||||||
tmp_path,
|
tmp_path,
|
||||||
side_effect_query_metadata,
|
side_effect_query_metadata,
|
||||||
@ -30,6 +157,23 @@ def test_tableau_ingest(pytestconfig, tmp_path):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@freeze_time(FROZEN_TIME)
|
||||||
|
@pytest.mark.slow_unit
|
||||||
|
def test_tableau_usage_stat(pytestconfig, tmp_path):
|
||||||
|
output_file_name: str = "tableau_stat_mces.json"
|
||||||
|
golden_file_name: str = "tableau_state_mces_golden.json"
|
||||||
|
func = define_query_metadata_func(
|
||||||
|
"workbooksConnection_0.json", "workbooksConnection_state_all.json"
|
||||||
|
)
|
||||||
|
tableau_ingest_common(
|
||||||
|
pytestconfig,
|
||||||
|
tmp_path,
|
||||||
|
func,
|
||||||
|
golden_file_name,
|
||||||
|
output_file_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_lineage_overrides():
|
def test_lineage_overrides():
|
||||||
# Simple - specify platform instance to presto table
|
# Simple - specify platform instance to presto table
|
||||||
assert (
|
assert (
|
||||||
|
Loading…
x
Reference in New Issue
Block a user