OpenMetadata/ingestion/tests/cli_e2e/test_cli_tableau.py
2025-09-10 01:52:34 +05:30

636 lines
23 KiB
Python

# Copyright 2022 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test Tableau connector with CLI - Enhanced with comprehensive lineage and metadata testing
"""
from pathlib import Path
from typing import List
import pytest
from metadata.generated.schema.entity.data.chart import Chart
from metadata.generated.schema.entity.data.dashboard import Dashboard
from metadata.generated.schema.entity.data.dashboardDataModel import DashboardDataModel
from metadata.ingestion.api.status import Status
from .base.test_cli import PATH_TO_RESOURCES
from .common.test_cli_dashboard import CliCommonDashboard
class TableauExpectedValues:
"""
Centralized expected values for Tableau testing based on actual test data
"""
# Basic counts - Updated based on actual test knowledge base
DASHBOARDS_AND_CHARTS = 5 # 1 dashboard + 4 charts = 5
LINEAGE_EDGES = 9
TAGS = 2
DATAMODEL_LINEAGE = 0
DATAMODELS = 2
FILTERED_MIX = 2
FILTERED_SINK_MIX = 2
DASHBOARDS_AND_CHARTS_AFTER_PATCH = 5
SERVICE_NAME = "local_tableau"
# Expected entity names
EXPECTED_DASHBOARD_NAMES = ["Analytics Workbook"]
EXPECTED_CHART_NAMES = [
"Product Measure Sheet",
"Sales Story",
"Product vs Category Dashboard",
"Category Measure Sheet",
]
EXPECTED_DATAMODEL_NAMES = [
"Sales Summary" # Appears in both TableauEmbeddedDatasource and TableauPublishedDatasource
]
# Expected data model columns/fields
EXPECTED_DATAMODEL_FIELDS = [
"state",
"category_name",
"order_date",
"product_id",
"customer_id",
"line_total",
"region",
"product_name",
"customer_name",
"Sales Summary (Custom SQL)",
"price_at_purchase",
"order_id",
"quantity",
]
# Expected tags
EXPECTED_TAGS = ["Analytics", "workbook"]
# Expected chart type
EXPECTED_CHART_TYPE = "ChartType.Other"
# Expected field type
EXPECTED_FIELD_TYPE = "tableau field"
# Expected data model types
EXPECTED_DATAMODEL_TYPES = [
"DataModelType.TableauEmbeddedDatasource",
"DataModelType.TableauPublishedDatasource",
]
# Expected SQL for data models
EXPECTED_DATAMODEL_SQL = """SELECT
o.order_id,
o.order_date,
c.customer_id,
c.first_name || ' ' || c.last_name AS customer_name,
c.state,
c.region,
p.product_id,
p.product_name,
cat.category_name,
oi.quantity,
oi.price_at_purchase,
(oi.quantity * oi.price_at_purchase) AS line_total
FROM
sales.orders AS o
JOIN
sales.customers AS c ON o.customer_id = c.customer_id
JOIN
sales.order_items AS oi ON o.order_id = oi.order_id
JOIN
inventory.products AS p ON oi.product_id = p.product_id
JOIN
inventory.categories AS cat ON p.category_id = cat.category_id"""
# Lineage expectations: Tables -> TableauPublishedDatasource -> TableauEmbeddedDatasource -> Dashboard
EXPECTED_SOURCE_TABLES = [
"categories",
"customers",
"order_items",
"orders",
"products",
]
# Filter patterns
INCLUDE_DASHBOARDS = [".*Analytics.*"]
EXCLUDE_DASHBOARDS = ["Sample.*"]
INCLUDE_CHARTS = [".*Sheet.*", ".*Product.*", ".*Sales.*"]
EXCLUDE_CHARTS = ["Obesity"]
INCLUDE_DATAMODELS = [".*Sales.*", ".*Summary.*"]
EXCLUDE_DATAMODELS = ["Random.*"]
class TableauCliTest(CliCommonDashboard.TestSuite):
"""
Enhanced Tableau CLI Test Suite with comprehensive lineage and metadata validation
"""
def prepare(self) -> None:
"""Prepare test environment by setting up required database service"""
redshift_file_path = str(
Path(
PATH_TO_RESOURCES
+ f"/dashboard/{self.get_connector_name()}/redshift.yaml"
)
)
self.run_command(test_file_path=redshift_file_path)
@staticmethod
def get_connector_name() -> str:
return "tableau"
# ================================
# FILTER CONFIGURATION METHODS
# ================================
def get_includes_dashboards(self) -> List[str]:
return TableauExpectedValues.INCLUDE_DASHBOARDS
def get_excludes_dashboards(self) -> List[str]:
return TableauExpectedValues.EXCLUDE_DASHBOARDS
def get_includes_charts(self) -> List[str]:
return TableauExpectedValues.INCLUDE_CHARTS
def get_excludes_charts(self) -> List[str]:
return TableauExpectedValues.EXCLUDE_CHARTS
def get_includes_datamodels(self) -> List[str]:
return TableauExpectedValues.INCLUDE_DATAMODELS
def get_excludes_datamodels(self) -> List[str]:
return TableauExpectedValues.EXCLUDE_DATAMODELS
# ================================
# EXPECTED COUNT METHODS
# ================================
def expected_dashboards_and_charts(self) -> int:
return TableauExpectedValues.DASHBOARDS_AND_CHARTS
def expected_lineage(self) -> int:
return TableauExpectedValues.LINEAGE_EDGES
def expected_tags(self) -> int:
return TableauExpectedValues.TAGS
def expected_datamodel_lineage(self) -> int:
return TableauExpectedValues.DATAMODEL_LINEAGE
def expected_datamodels(self) -> int:
return TableauExpectedValues.DATAMODELS
def expected_filtered_mix(self) -> int:
return TableauExpectedValues.FILTERED_MIX
def expected_filtered_sink_mix(self) -> int:
return TableauExpectedValues.FILTERED_SINK_MIX
def expected_dashboards_and_charts_after_patch(self) -> int:
return TableauExpectedValues.DASHBOARDS_AND_CHARTS_AFTER_PATCH
# ================================
# ENHANCED TEST METHODS
# ================================
@pytest.mark.order(10)
def test_metadata_ingestion_validation(self) -> None:
"""Test comprehensive metadata ingestion validation - runs after base ingestion"""
# Validate dashboards
self._validate_dashboard_metadata()
# Validate charts
self._validate_chart_metadata()
# Validate data models
self._validate_datamodel_metadata()
# Validate tags
self._validate_tags_metadata()
@pytest.mark.order(12)
def test_lineage_validation(self) -> None:
"""Test comprehensive lineage validation - Enable actual lineage testing"""
# Remove the pytest.skip and implement actual lineage validation
self._validate_dashboard_lineage()
self._validate_datamodel_lineage_chain()
@pytest.mark.order(13)
def test_datamodel_content_validation(self) -> None:
"""Test data model content and structure validation"""
self._validate_datamodel_fields()
self._validate_datamodel_sql()
self._validate_datamodel_types()
@pytest.mark.order(14)
def test_tags_and_metadata_quality(self) -> None:
"""Test tags and metadata quality"""
self._validate_tag_assignment()
self._validate_chart_types()
self._validate_field_types()
def _validate_dashboard_metadata(self) -> None:
"""Validate dashboard metadata completeness"""
dashboards = self.openmetadata.list_entities(
entity=Dashboard, params={"service": TableauExpectedValues.SERVICE_NAME}
).entities
self.assertGreaterEqual(
len(dashboards), len(TableauExpectedValues.EXPECTED_DASHBOARD_NAMES)
)
dashboard_names = [dashboard.displayName for dashboard in dashboards]
for expected_name in TableauExpectedValues.EXPECTED_DASHBOARD_NAMES:
self.assertIn(
expected_name,
dashboard_names,
f"Expected dashboard '{expected_name}' not found in ingested dashboards",
)
# Validate specific dashboard metadata
analytics_dashboard = self.get_entity_by_name(Dashboard, "Analytics Workbook")
if analytics_dashboard:
self.assertIsNotNone(analytics_dashboard.fullyQualifiedName)
self.assertIsNotNone(analytics_dashboard.service)
# Validate that charts are properly linked
if hasattr(analytics_dashboard, "charts") and analytics_dashboard.charts:
self.assertGreaterEqual(
len(analytics_dashboard.charts.root),
4,
"Analytics Workbook should have 4 or more charts",
)
def _validate_chart_metadata(self) -> None:
"""Validate chart metadata completeness"""
charts = self.openmetadata.list_entities(
entity=Chart, params={"service": TableauExpectedValues.SERVICE_NAME}
).entities
self.assertGreaterEqual(
len(charts), len(TableauExpectedValues.EXPECTED_CHART_NAMES)
)
chart_names = [chart.displayName for chart in charts]
for expected_name in TableauExpectedValues.EXPECTED_CHART_NAMES:
self.assertIn(
expected_name,
chart_names,
f"Expected chart '{expected_name}' not found in ingested charts",
)
def _validate_datamodel_metadata(self) -> None:
"""Validate data model metadata completeness"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
# Should have at least one "Sales Summary" data model
datamodel_names = [dm.displayName for dm in datamodels]
self.assertIn(
"Sales Summary", datamodel_names, "Sales Summary data model not found"
)
# Validate data model types
datamodel_types = []
for dm in datamodels:
if hasattr(dm, "dataModelType") and dm.dataModelType:
datamodel_types.append(str(dm.dataModelType))
for expected_type in TableauExpectedValues.EXPECTED_DATAMODEL_TYPES:
self.assertIn(
expected_type,
datamodel_types,
f"Expected data model type '{expected_type}' not found",
)
def _validate_tags_metadata(self) -> None:
"""Validate tags metadata"""
# Get all entities and check for expected tags
all_entities = []
all_entities.extend(
self.openmetadata.list_entities(
entity=Dashboard,
params={"service": TableauExpectedValues.SERVICE_NAME},
fields=["tags"],
).entities
)
all_entities.extend(
self.openmetadata.list_entities(
entity=Chart,
params={"service": TableauExpectedValues.SERVICE_NAME},
fields=["tags"],
).entities
)
found_tags = set()
for entity in all_entities:
if hasattr(entity, "tags") and entity.tags:
for tag in entity.tags:
if hasattr(tag, "name"):
# Extract tag name from FQN
found_tags.add(str(tag.name))
for expected_tag in TableauExpectedValues.EXPECTED_TAGS:
self.assertIn(
expected_tag,
found_tags,
f"Expected tag '{expected_tag}' not found in any entity",
)
def _validate_dashboard_lineage(self) -> None:
"""Validate dashboard lineage according to the knowledge base"""
# Lineage chain: Tables -> TableauPublishedDatasource -> TableauEmbeddedDatasource -> Dashboard
analytics_dashboard = self.get_entity_by_name(Dashboard, "Analytics Workbook")
if analytics_dashboard:
lineage = self.openmetadata.get_lineage_by_name(
entity=Dashboard,
fqn=analytics_dashboard.fullyQualifiedName.root,
up_depth=5, # Increased depth to capture full lineage chain
down_depth=1,
)
if lineage and lineage.get("upstreamEdges"):
self.assertGreater(
len(lineage["upstreamEdges"]),
6,
"Analytics Workbook should have upstream lineage",
)
def _validate_datamodel_lineage_chain(self) -> None:
"""Validate the complete lineage chain"""
# Get Sales Summary data models
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
sales_summary_models = [
dm for dm in datamodels if dm.displayName == "Sales Summary"
]
for datamodel in sales_summary_models:
lineage = self.openmetadata.get_lineage_by_name(
entity=DashboardDataModel,
fqn=datamodel.fullyQualifiedName.root,
up_depth=3,
down_depth=3,
)
if lineage and lineage.get("upstreamEdges"):
# Check for upstream connections (should connect to tables)
for edge in lineage["upstreamEdges"]:
if lineage_query := edge["lineageDetails"].get("sqlQuery"):
self.assertEqual(
" ".join(lineage_query.split()),
" ".join(
TableauExpectedValues.EXPECTED_DATAMODEL_SQL.split()
),
"Lineage SQL query does't match expected SQL query",
)
def _validate_datamodel_fields(self) -> None:
"""Validate data model fields/columns"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
sales_summary_models = [
dm for dm in datamodels if dm.name.root == "Sales Summary"
]
for datamodel in sales_summary_models:
if hasattr(datamodel, "columns") and datamodel.columns:
column_names = [col.name.root for col in datamodel.columns]
# Check for expected fields
found_fields = 0
for expected_field in TableauExpectedValues.EXPECTED_DATAMODEL_FIELDS:
if expected_field in column_names:
found_fields += 1
self.assertGreater(
found_fields,
5, # Should find at least 5 expected fields
f"Data model {datamodel.name.root} should contain expected fields",
)
def _validate_datamodel_sql(self) -> None:
"""Validate data model SQL content"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
sales_summary_models = [
dm for dm in datamodels if dm.name.root == "Sales Summary"
]
for datamodel in sales_summary_models:
if hasattr(datamodel, "sql") and datamodel.sql:
sql_content = (
datamodel.sql.root
if hasattr(datamodel.sql, "root")
else str(datamodel.sql)
)
# Check for key SQL elements
self.assertIn(
"SELECT", sql_content.upper(), "SQL should contain SELECT statement"
)
self.assertIn(
"JOIN", sql_content.upper(), "SQL should contain JOIN statements"
)
# Check for expected table references
for table in TableauExpectedValues.EXPECTED_SOURCE_TABLES:
self.assertIn(
table,
sql_content.lower(),
f"SQL should reference table '{table}'",
)
def _validate_datamodel_types(self) -> None:
"""Validate data model types"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
found_types = set()
for datamodel in datamodels:
if hasattr(datamodel, "dataModelType") and datamodel.dataModelType:
found_types.add(str(datamodel.dataModelType))
for expected_type in TableauExpectedValues.EXPECTED_DATAMODEL_TYPES:
self.assertIn(
expected_type,
found_types,
f"Expected data model type '{expected_type}' not found",
)
def _validate_tag_assignment(self) -> None:
"""Validate tag assignment to entities"""
# Check Analytics Workbook dashboard
analytics_dashboard = self.get_entity_by_name(Dashboard, "Analytics Workbook")
if (
analytics_dashboard
and hasattr(analytics_dashboard, "tags")
and analytics_dashboard.tags
):
dashboard_tags = {str(tag.name) for tag in analytics_dashboard.tags}
for expected_tag in TableauExpectedValues.EXPECTED_TAGS:
self.assertIn(
expected_tag,
dashboard_tags,
f"Dashboard should have tag '{expected_tag}'",
)
# Check charts
for chart_name in TableauExpectedValues.EXPECTED_CHART_NAMES:
chart = self.get_entity_by_name(Chart, chart_name, fields=None)
if chart and hasattr(chart, "tags") and chart.tags:
chart_tags = {str(tag.name) for tag in chart.tags}
for expected_tag in TableauExpectedValues.EXPECTED_TAGS:
self.assertIn(
expected_tag,
chart_tags,
f"Chart '{chart_name}' should have tag '{expected_tag}'",
)
def _validate_chart_types(self) -> None:
"""Validate chart types"""
charts = self.openmetadata.list_entities(
entity=Chart, params={"service": TableauExpectedValues.SERVICE_NAME}
).entities
for chart in charts:
if hasattr(chart, "chartType") and chart.chartType:
self.assertEqual(
str(chart.chartType),
TableauExpectedValues.EXPECTED_CHART_TYPE,
f"Chart '{chart.name.root}' should have type '{TableauExpectedValues.EXPECTED_CHART_TYPE}'",
)
def _validate_field_types(self) -> None:
"""Validate field types in data models"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
for datamodel in datamodels:
if hasattr(datamodel, "columns") and datamodel.columns:
for column in datamodel.columns:
if hasattr(column, "dataType") and column.dataType:
# Check if field type matches expected
field_type = str(column.dataType)
self.assertIn(
"DataType.RECORD",
field_type,
f"Field '{column.name.root}' should have tableau-related type",
)
@pytest.mark.order(11)
def test_lineage(self) -> None:
"""Enable lineage testing - Remove the skip"""
self._validate_dashboard_lineage()
self._validate_datamodel_lineage_chain()
def assert_not_including(self, source_status: Status, sink_status: Status):
"""
Override base method for Tableau-specific behavior.
"""
self.assertTrue(len(source_status.failures) == 0)
self.assertTrue(len(source_status.warnings) == 0)
self.assertTrue(len(source_status.filtered) >= 5)
# We can have a diff of 1 element if we are counting the service, which is only marked as ingested in the
# first go
self.assertTrue(
self.expected_dashboards_and_charts()
<= (len(source_status.records) + len(source_status.updated_records))
)
self.assertTrue(len(sink_status.failures) == 0)
self.assertTrue(len(sink_status.warnings) == 0)
self.assertTrue(
self.expected_dashboards_and_charts()
<= (len(sink_status.records) + len(sink_status.updated_records))
)
def assert_for_vanilla_ingestion(
self, source_status: Status, sink_status: Status
) -> None:
self.assertTrue(len(source_status.failures) == 0)
self.assertTrue(len(source_status.warnings) == 0)
self.assertTrue(len(source_status.filtered) >= 5)
self.assertGreaterEqual(
(len(source_status.records) + len(source_status.updated_records)), # 20-22
self.expected_dashboards_and_charts_after_patch()
+ self.expected_tags()
+ self.expected_lineage()
+ self.expected_datamodel_lineage(),
)
self.assertTrue(len(sink_status.failures) == 0)
self.assertTrue(len(sink_status.warnings) == 0)
self.assertGreaterEqual(
(len(sink_status.records) + len(sink_status.updated_records)),
self.expected_dashboards_and_charts_after_patch()
+ self.expected_tags()
+ self.expected_datamodels(),
)
def get_entity_by_name(
self,
entity_type,
name: str,
service: str = TableauExpectedValues.SERVICE_NAME,
fields: List = ["tags", "charts"],
):
"""Helper to get entity by name or displayName"""
entities = self.openmetadata.list_entities(
entity=entity_type, params={"service": service}, fields=fields
).entities
for entity in entities:
# Check both name and displayName for matches
entity_name = (
entity.name.root if hasattr(entity.name, "root") else str(entity.name)
)
entity_display_name = (
entity.displayName.root
if hasattr(entity, "displayName")
and entity.displayName
and hasattr(entity.displayName, "root")
else (
str(entity.displayName)
if hasattr(entity, "displayName") and entity.displayName
else None
)
)
if entity_name == name or entity_display_name == name:
return entity
return None
def validate_entity_exists(
self, entity_type, name: str, service: str = TableauExpectedValues.SERVICE_NAME
):
"""Helper to validate entity exists"""
entity = self.get_entity_by_name(entity_type, name, service)
self.assertIsNotNone(
entity, f"{entity_type.__name__} '{name}' not found in service '{service}'"
)
return entity