datahub/metadata-ingestion/tests/unit/fivetran/test_fivetran_models_validation.py

259 lines
9.2 KiB
Python
Raw Normal View History

2025-09-12 23:28:55 +01:00
"""
Unit tests for Fivetran models and configuration validation.
Tests focus on business logic, edge cases, and data validation that affects connector behavior.
"""
from datahub.ingestion.source.fivetran.config import (
PlatformDetail,
)
from datahub.ingestion.source.fivetran.models import (
ColumnLineage,
Connector,
Job,
TableLineage,
)
class TestPlatformDetailValidation:
"""Test platform detail configuration validation and edge cases."""
def test_platform_detail_with_database_name_validation(self) -> None:
"""Test that database names are handled correctly for URN generation."""
# Test with various database name formats that could affect URN generation
test_cases = [
("my-database", True), # Hyphenated name
("my_database", True), # Underscore name
("123database", True), # Starting with number
("database.with.dots", True), # Dots in name
("", True), # Empty database name should be allowed
]
for db_name, should_be_valid in test_cases:
if should_be_valid:
detail = PlatformDetail(
platform="postgres", database=db_name, env="PROD"
)
assert detail.database == db_name
def test_platform_detail_environment_validation(self) -> None:
"""Test environment validation affects URN generation."""
valid_envs = ["PROD", "DEV", "STAGING", "TEST", "QA"]
for env in valid_envs:
detail = PlatformDetail(platform="postgres", env=env)
assert detail.env == env
def test_platform_detail_schema_inclusion_logic(self) -> None:
"""Test schema inclusion logic that affects URN structure."""
# When include_schema_in_urn is True, schema should be part of URN
detail_with_schema = PlatformDetail(
platform="postgres", include_schema_in_urn=True
)
assert detail_with_schema.include_schema_in_urn is True
# When False, schema should not be part of URN
detail_without_schema = PlatformDetail(
platform="postgres", include_schema_in_urn=False
)
assert detail_without_schema.include_schema_in_urn is False
class TestConnectorModelValidation:
"""Test connector model validation and business logic."""
def test_connector_with_invalid_sync_frequency(self) -> None:
"""Test connector validation with edge case sync frequencies."""
# Very low sync frequency (should be allowed)
connector_low = Connector(
connector_id="test",
connector_name="Test",
connector_type="postgres",
paused=False,
sync_frequency=1, # 1 minute
destination_id="dest",
)
assert connector_low.sync_frequency == 1
# Very high sync frequency (should be allowed)
connector_high = Connector(
connector_id="test",
connector_name="Test",
connector_type="postgres",
paused=False,
sync_frequency=525600, # 1 year in minutes
destination_id="dest",
)
assert connector_high.sync_frequency == 525600
def test_connector_with_complex_lineage_relationships(self) -> None:
"""Test connector with complex lineage that could affect performance."""
# Create many column lineages to test performance limits
column_lineages = [
ColumnLineage(
source_column=f"source_col_{i}", destination_column=f"dest_col_{i}"
)
for i in range(100) # Test with many columns
]
table_lineage = TableLineage(
source_table="large_source_table",
destination_table="large_dest_table",
column_lineage=column_lineages,
)
connector = Connector(
connector_id="large_connector",
connector_name="Large Connector",
connector_type="postgres",
paused=False,
sync_frequency=1440,
destination_id="dest",
lineage=[table_lineage],
)
assert len(connector.lineage) == 1
assert len(connector.lineage[0].column_lineage) == 100
def test_connector_with_many_jobs(self) -> None:
"""Test connector with many jobs to validate performance."""
# Create many jobs to test limits
jobs = [
Job(
job_id=f"job_{i}",
start_time=1234567890 + i,
end_time=1234567890 + i + 60,
status="SUCCESS" if i % 2 == 0 else "FAILED",
)
for i in range(50) # Test with many jobs
]
connector = Connector(
connector_id="job_heavy_connector",
connector_name="Job Heavy Connector",
connector_type="postgres",
paused=False,
sync_frequency=1440,
destination_id="dest",
jobs=jobs,
)
assert len(connector.jobs) == 50
# Test job status distribution
success_jobs = [j for j in connector.jobs if j.status == "SUCCESS"]
failed_jobs = [j for j in connector.jobs if j.status == "FAILED"]
assert len(success_jobs) == 25
assert len(failed_jobs) == 25
def test_connector_user_id_none_handling(self) -> None:
"""Test that None user_id is handled correctly (important for API calls)."""
connector = Connector(
connector_id="test",
connector_name="Test",
connector_type="postgres",
paused=False,
sync_frequency=1440,
destination_id="dest",
user_id=None,
)
# This should not raise an error and user_id should be None
assert connector.user_id is None
class TestLineageModelValidation:
"""Test lineage model validation and edge cases."""
def test_column_lineage_with_special_characters(self) -> None:
"""Test column lineage with special characters that could break URNs."""
special_cases = [
("source.col", "dest.col"), # Dots
("source-col", "dest-col"), # Hyphens
("source_col", "dest_col"), # Underscores
("Source Col", "Dest Col"), # Spaces
("source123", "dest456"), # Numbers
]
for source, dest in special_cases:
col_lineage = ColumnLineage(source_column=source, destination_column=dest)
assert col_lineage.source_column == source
assert col_lineage.destination_column == dest
def test_table_lineage_with_schema_qualified_names(self) -> None:
"""Test table lineage with schema-qualified table names."""
table_lineage = TableLineage(
source_table="source_schema.source_table",
destination_table="dest_schema.dest_table",
column_lineage=[],
)
assert "source_schema" in table_lineage.source_table
assert "dest_schema" in table_lineage.destination_table
def test_empty_lineage_collections(self) -> None:
"""Test that empty lineage collections are handled correctly."""
# Empty column lineage should be fine
table_lineage = TableLineage(
source_table="source", destination_table="dest", column_lineage=[]
)
assert table_lineage.column_lineage == []
# Connector with empty lineage should be fine
connector = Connector(
connector_id="test",
connector_name="Test",
connector_type="postgres",
paused=False,
sync_frequency=1440,
destination_id="dest",
lineage=[],
)
assert connector.lineage == []
class TestJobModelValidation:
"""Test job model validation and business logic."""
def test_job_with_different_status_values(self) -> None:
"""Test job model with various status values that affect processing."""
status_values = ["SUCCESS", "FAILED", "RUNNING", "PAUSED", "CANCELLED"]
for status in status_values:
job = Job(
job_id=f"job_{status.lower()}",
start_time=1234567890,
end_time=1234567900,
status=status,
)
assert job.status == status
def test_job_timing_validation(self) -> None:
"""Test job timing logic that affects sync history processing."""
# Normal case: end_time > start_time
job_normal = Job(
job_id="normal_job",
start_time=1234567890,
end_time=1234567900, # 10 seconds later
status="SUCCESS",
)
assert job_normal.end_time > job_normal.start_time
# Edge case: same start and end time (instant job)
job_instant = Job(
job_id="instant_job",
start_time=1234567890,
end_time=1234567890, # Same time
status="SUCCESS",
)
assert job_instant.end_time == job_instant.start_time
# Edge case: very long running job
job_long = Job(
job_id="long_job",
start_time=1234567890,
end_time=1234567890 + 86400, # 24 hours later
status="SUCCESS",
)
duration = job_long.end_time - job_long.start_time
assert duration == 86400 # 24 hours in seconds