Update: Tableau e2e Tests (#23026)

This commit is contained in:
Suman Maharana 2025-08-21 09:00:23 +05:30 committed by GitHub
parent f19c0be59e
commit 582d5eb7d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 556 additions and 25 deletions

View File

@ -12,7 +12,9 @@ source:
config: config:
schemaFilterPattern: schemaFilterPattern:
includes: includes:
- dbt_jaffle - sales
- inventory
- marketing
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -9,12 +9,16 @@ source:
personalAccessTokenSecret: $E2E_TABLEAU_PAT_SECRET personalAccessTokenSecret: $E2E_TABLEAU_PAT_SECRET
hostPort: $E2E_TABLEAU_HOST_PORT hostPort: $E2E_TABLEAU_HOST_PORT
siteName: $E2E_TABLEAU_SITE siteName: $E2E_TABLEAU_SITE
apiVersion: "3.26"
sourceConfig: sourceConfig:
config: config:
type: DashboardMetadata type: DashboardMetadata
lineageInformation: lineageInformation:
dbServiceNames: dbServiceNames:
- local_redshift_tableau - local_redshift_tableau
projectFilterPattern:
includes:
- Analytics
sink: sink:
type: metadata-rest type: metadata-rest
config: {} config: {}

View File

@ -10,22 +10,134 @@
# limitations under the License. # limitations under the License.
""" """
Test Tableau connector with CLI Test Tableau connector with CLI - Enhanced with comprehensive lineage and metadata testing
""" """
from pathlib import Path from pathlib import Path
from typing import List from typing import List
import pytest import pytest
from metadata.generated.schema.entity.data.chart import Chart
from metadata.generated.schema.entity.data.dashboard import Dashboard
from metadata.generated.schema.entity.data.dashboardDataModel import DashboardDataModel
from metadata.ingestion.api.status import Status from metadata.ingestion.api.status import Status
from .base.test_cli import PATH_TO_RESOURCES from .base.test_cli import PATH_TO_RESOURCES
from .common.test_cli_dashboard import CliCommonDashboard from .common.test_cli_dashboard import CliCommonDashboard
class TableauExpectedValues:
"""
Centralized expected values for Tableau testing based on actual test data
"""
# Basic counts - Updated based on actual test knowledge base
DASHBOARDS_AND_CHARTS = 5 # 1 dashboard + 4 charts = 5
LINEAGE_EDGES = 9
TAGS = 2
DATAMODEL_LINEAGE = 0
DATAMODELS = 2
FILTERED_MIX = 2
FILTERED_SINK_MIX = 2
DASHBOARDS_AND_CHARTS_AFTER_PATCH = 5
SERVICE_NAME = "local_tableau"
# Expected entity names
EXPECTED_DASHBOARD_NAMES = ["Analytics Workbook"]
EXPECTED_CHART_NAMES = [
"Product Measure Sheet",
"Sales Story",
"Product vs Category Dashboard",
"Category Measure Sheet",
]
EXPECTED_DATAMODEL_NAMES = [
"Sales Summary" # Appears in both TableauEmbeddedDatasource and TableauPublishedDatasource
]
# Expected data model columns/fields
EXPECTED_DATAMODEL_FIELDS = [
"state",
"category_name",
"order_date",
"product_id",
"customer_id",
"line_total",
"region",
"product_name",
"customer_name",
"Sales Summary (Custom SQL)",
"price_at_purchase",
"order_id",
"quantity",
]
# Expected tags
EXPECTED_TAGS = ["Analytics", "workbook"]
# Expected chart type
EXPECTED_CHART_TYPE = "ChartType.Other"
# Expected field type
EXPECTED_FIELD_TYPE = "tableau field"
# Expected data model types
EXPECTED_DATAMODEL_TYPES = [
"DataModelType.TableauEmbeddedDatasource",
"DataModelType.TableauPublishedDatasource",
]
# Expected SQL for data models
EXPECTED_DATAMODEL_SQL = """SELECT
o.order_id,
o.order_date,
c.customer_id,
c.first_name || ' ' || c.last_name AS customer_name,
c.state,
c.region,
p.product_id,
p.product_name,
cat.category_name,
oi.quantity,
oi.price_at_purchase,
(oi.quantity * oi.price_at_purchase) AS line_total
FROM
sales.orders AS o
JOIN
sales.customers AS c ON o.customer_id = c.customer_id
JOIN
sales.order_items AS oi ON o.order_id = oi.order_id
JOIN
inventory.products AS p ON oi.product_id = p.product_id
JOIN
inventory.categories AS cat ON p.category_id = cat.category_id"""
# Lineage expectations: Tables -> TableauPublishedDatasource -> TableauEmbeddedDatasource -> Dashboard
EXPECTED_SOURCE_TABLES = [
"categories",
"customers",
"order_items",
"orders",
"products",
]
# Filter patterns
INCLUDE_DASHBOARDS = [".*Analytics.*"]
EXCLUDE_DASHBOARDS = ["Sample.*"]
INCLUDE_CHARTS = [".*Sheet.*", ".*Product.*", ".*Sales.*"]
EXCLUDE_CHARTS = ["Obesity"]
INCLUDE_DATAMODELS = [".*Sales.*", ".*Summary.*"]
EXCLUDE_DATAMODELS = ["Random.*"]
class TableauCliTest(CliCommonDashboard.TestSuite): class TableauCliTest(CliCommonDashboard.TestSuite):
# in case we want to do something before running the tests """
Enhanced Tableau CLI Test Suite with comprehensive lineage and metadata validation
"""
def prepare(self) -> None: def prepare(self) -> None:
"""Prepare test environment by setting up required database service"""
redshift_file_path = str( redshift_file_path = str(
Path( Path(
PATH_TO_RESOURCES PATH_TO_RESOURCES
@ -38,64 +150,412 @@ class TableauCliTest(CliCommonDashboard.TestSuite):
def get_connector_name() -> str: def get_connector_name() -> str:
return "tableau" return "tableau"
# ================================
# FILTER CONFIGURATION METHODS
# ================================
def get_includes_dashboards(self) -> List[str]: def get_includes_dashboards(self) -> List[str]:
return [".*Test.*", "Regional"] return TableauExpectedValues.INCLUDE_DASHBOARDS
def get_excludes_dashboards(self) -> List[str]: def get_excludes_dashboards(self) -> List[str]:
return ["Superstore"] return TableauExpectedValues.EXCLUDE_DASHBOARDS
def get_includes_charts(self) -> List[str]: def get_includes_charts(self) -> List[str]:
return [".*Sheet", "Economy"] return TableauExpectedValues.INCLUDE_CHARTS
def get_excludes_charts(self) -> List[str]: def get_excludes_charts(self) -> List[str]:
return ["Obesity"] return TableauExpectedValues.EXCLUDE_CHARTS
def get_includes_datamodels(self) -> List[str]: def get_includes_datamodels(self) -> List[str]:
return ["Test.*"] return TableauExpectedValues.INCLUDE_DATAMODELS
def get_excludes_datamodels(self) -> List[str]: def get_excludes_datamodels(self) -> List[str]:
return ["Random.*"] return TableauExpectedValues.EXCLUDE_DATAMODELS
# ================================
# EXPECTED COUNT METHODS
# ================================
def expected_dashboards_and_charts(self) -> int: def expected_dashboards_and_charts(self) -> int:
return 22 return TableauExpectedValues.DASHBOARDS_AND_CHARTS
def expected_lineage(self) -> int: def expected_lineage(self) -> int:
return 4 return TableauExpectedValues.LINEAGE_EDGES
def expected_tags(self) -> int: def expected_tags(self) -> int:
return 0 return TableauExpectedValues.TAGS
def expected_datamodel_lineage(self) -> int: def expected_datamodel_lineage(self) -> int:
return 0 return TableauExpectedValues.DATAMODEL_LINEAGE
def expected_datamodels(self) -> int: def expected_datamodels(self) -> int:
return 4 return TableauExpectedValues.DATAMODELS
def expected_filtered_mix(self) -> int: def expected_filtered_mix(self) -> int:
return 2 return TableauExpectedValues.FILTERED_MIX
def expected_filtered_sink_mix(self) -> int: def expected_filtered_sink_mix(self) -> int:
return 8 return TableauExpectedValues.FILTERED_SINK_MIX
def expected_dashboards_and_charts_after_patch(self) -> int: def expected_dashboards_and_charts_after_patch(self) -> int:
return 4 return TableauExpectedValues.DASHBOARDS_AND_CHARTS_AFTER_PATCH
# ================================
# ENHANCED TEST METHODS
# ================================
@pytest.mark.order(10)
def test_metadata_ingestion_validation(self) -> None:
"""Test comprehensive metadata ingestion validation - runs after base ingestion"""
# Validate dashboards
self._validate_dashboard_metadata()
# Validate charts
self._validate_chart_metadata()
# Validate data models
self._validate_datamodel_metadata()
# Validate tags
self._validate_tags_metadata()
@pytest.mark.order(12)
def test_lineage_validation(self) -> None:
"""Test comprehensive lineage validation - Enable actual lineage testing"""
# Remove the pytest.skip and implement actual lineage validation
self._validate_dashboard_lineage()
self._validate_datamodel_lineage_chain()
@pytest.mark.order(13)
def test_datamodel_content_validation(self) -> None:
"""Test data model content and structure validation"""
self._validate_datamodel_fields()
self._validate_datamodel_sql()
self._validate_datamodel_types()
@pytest.mark.order(14)
def test_tags_and_metadata_quality(self) -> None:
"""Test tags and metadata quality"""
self._validate_tag_assignment()
self._validate_chart_types()
self._validate_field_types()
def _validate_dashboard_metadata(self) -> None:
"""Validate dashboard metadata completeness"""
dashboards = self.openmetadata.list_entities(
entity=Dashboard, params={"service": TableauExpectedValues.SERVICE_NAME}
).entities
self.assertGreaterEqual(
len(dashboards), len(TableauExpectedValues.EXPECTED_DASHBOARD_NAMES)
)
dashboard_names = [dashboard.displayName for dashboard in dashboards]
for expected_name in TableauExpectedValues.EXPECTED_DASHBOARD_NAMES:
self.assertIn(
expected_name,
dashboard_names,
f"Expected dashboard '{expected_name}' not found in ingested dashboards",
)
# Validate specific dashboard metadata
analytics_dashboard = self.get_entity_by_name(Dashboard, "Analytics Workbook")
if analytics_dashboard:
self.assertIsNotNone(analytics_dashboard.fullyQualifiedName)
self.assertIsNotNone(analytics_dashboard.service)
# Validate that charts are properly linked
if hasattr(analytics_dashboard, "charts") and analytics_dashboard.charts:
self.assertEqual(
len(analytics_dashboard.charts.root),
4,
"Analytics Workbook should have 4 charts",
)
def _validate_chart_metadata(self) -> None:
"""Validate chart metadata completeness"""
charts = self.openmetadata.list_entities(
entity=Chart, params={"service": TableauExpectedValues.SERVICE_NAME}
).entities
self.assertGreaterEqual(
len(charts), len(TableauExpectedValues.EXPECTED_CHART_NAMES)
)
chart_names = [chart.displayName for chart in charts]
for expected_name in TableauExpectedValues.EXPECTED_CHART_NAMES:
self.assertIn(
expected_name,
chart_names,
f"Expected chart '{expected_name}' not found in ingested charts",
)
def _validate_datamodel_metadata(self) -> None:
"""Validate data model metadata completeness"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
# Should have at least one "Sales Summary" data model
datamodel_names = [dm.displayName for dm in datamodels]
self.assertIn(
"Sales Summary", datamodel_names, "Sales Summary data model not found"
)
# Validate data model types
datamodel_types = []
for dm in datamodels:
if hasattr(dm, "dataModelType") and dm.dataModelType:
datamodel_types.append(str(dm.dataModelType))
for expected_type in TableauExpectedValues.EXPECTED_DATAMODEL_TYPES:
self.assertIn(
expected_type,
datamodel_types,
f"Expected data model type '{expected_type}' not found",
)
def _validate_tags_metadata(self) -> None:
"""Validate tags metadata"""
# Get all entities and check for expected tags
all_entities = []
all_entities.extend(
self.openmetadata.list_entities(
entity=Dashboard,
params={"service": TableauExpectedValues.SERVICE_NAME},
fields=["tags"],
).entities
)
all_entities.extend(
self.openmetadata.list_entities(
entity=Chart,
params={"service": TableauExpectedValues.SERVICE_NAME},
fields=["tags"],
).entities
)
found_tags = set()
for entity in all_entities:
if hasattr(entity, "tags") and entity.tags:
for tag in entity.tags:
if hasattr(tag, "name"):
# Extract tag name from FQN
found_tags.add(str(tag.name))
for expected_tag in TableauExpectedValues.EXPECTED_TAGS:
self.assertIn(
expected_tag,
found_tags,
f"Expected tag '{expected_tag}' not found in any entity",
)
def _validate_dashboard_lineage(self) -> None:
"""Validate dashboard lineage according to the knowledge base"""
# Lineage chain: Tables -> TableauPublishedDatasource -> TableauEmbeddedDatasource -> Dashboard
analytics_dashboard = self.get_entity_by_name(Dashboard, "Analytics Workbook")
if analytics_dashboard:
lineage = self.openmetadata.get_lineage_by_name(
entity=Dashboard,
fqn=analytics_dashboard.fullyQualifiedName.root,
up_depth=5, # Increased depth to capture full lineage chain
down_depth=1,
)
if lineage and lineage.get("upstreamEdges"):
self.assertGreater(
len(lineage["upstreamEdges"]),
6,
"Analytics Workbook should have upstream lineage",
)
def _validate_datamodel_lineage_chain(self) -> None:
"""Validate the complete lineage chain"""
# Get Sales Summary data models
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
sales_summary_models = [
dm for dm in datamodels if dm.displayName == "Sales Summary"
]
for datamodel in sales_summary_models:
lineage = self.openmetadata.get_lineage_by_name(
entity=DashboardDataModel,
fqn=datamodel.fullyQualifiedName.root,
up_depth=3,
down_depth=3,
)
if lineage and lineage.get("upstreamEdges"):
# Check for upstream connections (should connect to tables)
for edge in lineage["upstreamEdges"]:
if lineage_query := edge["lineageDetails"].get("sqlQuery"):
self.assertEqual(
" ".join(lineage_query.split()),
" ".join(
TableauExpectedValues.EXPECTED_DATAMODEL_SQL.split()
),
"Lineage SQL query does't match expected SQL query",
)
def _validate_datamodel_fields(self) -> None:
"""Validate data model fields/columns"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
sales_summary_models = [
dm for dm in datamodels if dm.name.root == "Sales Summary"
]
for datamodel in sales_summary_models:
if hasattr(datamodel, "columns") and datamodel.columns:
column_names = [col.name.root for col in datamodel.columns]
# Check for expected fields
found_fields = 0
for expected_field in TableauExpectedValues.EXPECTED_DATAMODEL_FIELDS:
if expected_field in column_names:
found_fields += 1
self.assertGreater(
found_fields,
5, # Should find at least 5 expected fields
f"Data model {datamodel.name.root} should contain expected fields",
)
def _validate_datamodel_sql(self) -> None:
"""Validate data model SQL content"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
sales_summary_models = [
dm for dm in datamodels if dm.name.root == "Sales Summary"
]
for datamodel in sales_summary_models:
if hasattr(datamodel, "sql") and datamodel.sql:
sql_content = (
datamodel.sql.root
if hasattr(datamodel.sql, "root")
else str(datamodel.sql)
)
# Check for key SQL elements
self.assertIn(
"SELECT", sql_content.upper(), "SQL should contain SELECT statement"
)
self.assertIn(
"JOIN", sql_content.upper(), "SQL should contain JOIN statements"
)
# Check for expected table references
for table in TableauExpectedValues.EXPECTED_SOURCE_TABLES:
self.assertIn(
table,
sql_content.lower(),
f"SQL should reference table '{table}'",
)
def _validate_datamodel_types(self) -> None:
"""Validate data model types"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
found_types = set()
for datamodel in datamodels:
if hasattr(datamodel, "dataModelType") and datamodel.dataModelType:
found_types.add(str(datamodel.dataModelType))
for expected_type in TableauExpectedValues.EXPECTED_DATAMODEL_TYPES:
self.assertIn(
expected_type,
found_types,
f"Expected data model type '{expected_type}' not found",
)
def _validate_tag_assignment(self) -> None:
"""Validate tag assignment to entities"""
# Check Analytics Workbook dashboard
analytics_dashboard = self.get_entity_by_name(Dashboard, "Analytics Workbook")
if (
analytics_dashboard
and hasattr(analytics_dashboard, "tags")
and analytics_dashboard.tags
):
dashboard_tags = {str(tag.name) for tag in analytics_dashboard.tags}
for expected_tag in TableauExpectedValues.EXPECTED_TAGS:
self.assertIn(
expected_tag,
dashboard_tags,
f"Dashboard should have tag '{expected_tag}'",
)
# Check charts
for chart_name in TableauExpectedValues.EXPECTED_CHART_NAMES:
chart = self.get_entity_by_name(Chart, chart_name, fields=None)
if chart and hasattr(chart, "tags") and chart.tags:
chart_tags = {str(tag.name) for tag in chart.tags}
for expected_tag in TableauExpectedValues.EXPECTED_TAGS:
self.assertIn(
expected_tag,
chart_tags,
f"Chart '{chart_name}' should have tag '{expected_tag}'",
)
def _validate_chart_types(self) -> None:
"""Validate chart types"""
charts = self.openmetadata.list_entities(
entity=Chart, params={"service": TableauExpectedValues.SERVICE_NAME}
).entities
for chart in charts:
if hasattr(chart, "chartType") and chart.chartType:
self.assertEqual(
str(chart.chartType),
TableauExpectedValues.EXPECTED_CHART_TYPE,
f"Chart '{chart.name.root}' should have type '{TableauExpectedValues.EXPECTED_CHART_TYPE}'",
)
def _validate_field_types(self) -> None:
"""Validate field types in data models"""
datamodels = self.openmetadata.list_entities(
entity=DashboardDataModel,
params={"service": TableauExpectedValues.SERVICE_NAME},
).entities
for datamodel in datamodels:
if hasattr(datamodel, "columns") and datamodel.columns:
for column in datamodel.columns:
if hasattr(column, "dataType") and column.dataType:
# Check if field type matches expected
field_type = str(column.dataType)
self.assertIn(
"DataType.RECORD",
field_type,
f"Field '{column.name.root}' should have tableau-related type",
)
@pytest.mark.order(11) @pytest.mark.order(11)
def test_lineage(self) -> None: def test_lineage(self) -> None:
pytest.skip("Lineage not configured. Skipping Test") """Enable lineage testing - Remove the skip"""
self._validate_dashboard_lineage()
self._validate_datamodel_lineage_chain()
# Overriding the method since for Tableau we don't expect lineage to be shown on this assert.
# This differs from the base case
def assert_not_including(self, source_status: Status, sink_status: Status): def assert_not_including(self, source_status: Status, sink_status: Status):
""" """
Here we can have a diff of 1 element due to the service Override base method for Tableau-specific behavior.
being ingested in the first round.
This will not happen on subsequent tests or executions
""" """
self.assertTrue(len(source_status.failures) == 0) self.assertTrue(len(source_status.failures) == 0)
self.assertTrue(len(source_status.warnings) == 0) self.assertTrue(len(source_status.warnings) == 0)
self.assertTrue(len(source_status.filtered) == 0) self.assertTrue(len(source_status.filtered) >= 5)
# We can have a diff of 1 element if we are counting the service, which is only marked as ingested in the # We can have a diff of 1 element if we are counting the service, which is only marked as ingested in the
# first go # first go
self.assertTrue( self.assertTrue(
@ -108,3 +568,68 @@ class TableauCliTest(CliCommonDashboard.TestSuite):
self.expected_dashboards_and_charts() self.expected_dashboards_and_charts()
<= (len(sink_status.records) + len(sink_status.updated_records)) <= (len(sink_status.records) + len(sink_status.updated_records))
) )
def assert_for_vanilla_ingestion(
self, source_status: Status, sink_status: Status
) -> None:
self.assertTrue(len(source_status.failures) == 0)
self.assertTrue(len(source_status.warnings) == 0)
self.assertTrue(len(source_status.filtered) >= 5)
self.assertGreaterEqual(
(len(source_status.records) + len(source_status.updated_records)), # 20-22
self.expected_dashboards_and_charts_after_patch()
+ self.expected_tags()
+ self.expected_lineage()
+ self.expected_datamodel_lineage(),
)
self.assertTrue(len(sink_status.failures) == 0)
self.assertTrue(len(sink_status.warnings) == 0)
self.assertGreaterEqual(
(len(sink_status.records) + len(sink_status.updated_records)),
self.expected_dashboards_and_charts_after_patch()
+ self.expected_tags()
+ self.expected_datamodels(),
)
def get_entity_by_name(
self,
entity_type,
name: str,
service: str = TableauExpectedValues.SERVICE_NAME,
fields: List = ["tags", "charts"],
):
"""Helper to get entity by name or displayName"""
entities = self.openmetadata.list_entities(
entity=entity_type, params={"service": service}, fields=fields
).entities
for entity in entities:
# Check both name and displayName for matches
entity_name = (
entity.name.root if hasattr(entity.name, "root") else str(entity.name)
)
entity_display_name = (
entity.displayName.root
if hasattr(entity, "displayName")
and entity.displayName
and hasattr(entity.displayName, "root")
else (
str(entity.displayName)
if hasattr(entity, "displayName") and entity.displayName
else None
)
)
if entity_name == name or entity_display_name == name:
return entity
return None
def validate_entity_exists(
self, entity_type, name: str, service: str = TableauExpectedValues.SERVICE_NAME
):
"""Helper to validate entity exists"""
entity = self.get_entity_by_name(entity_type, name, service)
self.assertIsNotNone(
entity, f"{entity_type.__name__} '{name}' not found in service '{service}'"
)
return entity