mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-04 12:36:23 +00:00 
			
		
		
		
	Kafka connect improvements (#23845)
* Kafka Connect Lineage Improvements * Remove specific Kafka topic example from docstring Removed example from the documentation regarding the earnin.bank.dev topic. * fix: update comment to reflect accurate example for database server name handling * fix: improve expected FQN display in warning messages for missing Kafka topics * fix: update table entity retrieval method in KafkaconnectSource * fix: enhance lineage information checks and improve logging for missing configurations in KafkaconnectSource * Kafka Connect Lineage Improvements * address comments; work without the table.include.list --------- Co-authored-by: Ayush Shah <ayush@getcollate.io>
This commit is contained in:
		
							parent
							
								
									5c638f5c8e
								
							
						
					
					
						commit
						ce3a9bd654
					
				@ -43,7 +43,6 @@ def parse_cdc_topic_name(topic_name: str, database_server_name: str = None) -> d
 | 
				
			|||||||
    - Examples:
 | 
					    - Examples:
 | 
				
			||||||
      - MysqlKafkaV2.ecommerce.orders -> database=ecommerce, table=orders
 | 
					      - MysqlKafkaV2.ecommerce.orders -> database=ecommerce, table=orders
 | 
				
			||||||
      - PostgresKafkaCDC.public.orders -> database=public, table=orders
 | 
					      - PostgresKafkaCDC.public.orders -> database=public, table=orders
 | 
				
			||||||
      - ecommerce.customers -> database=ecommerce, table=customers (if server-name matches)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Args:
 | 
					    Args:
 | 
				
			||||||
        topic_name: The Kafka topic name
 | 
					        topic_name: The Kafka topic name
 | 
				
			||||||
@ -59,30 +58,47 @@ def parse_cdc_topic_name(topic_name: str, database_server_name: str = None) -> d
 | 
				
			|||||||
    if topic_name.startswith(("_", "dbhistory.", "__")):
 | 
					    if topic_name.startswith(("_", "dbhistory.", "__")):
 | 
				
			||||||
        return {}
 | 
					        return {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # If database_server_name is provided, check if topic starts with it
 | 
				
			||||||
 | 
					    # This handles server names with dots like "collate.ecommerce.dev"
 | 
				
			||||||
 | 
					    if database_server_name:
 | 
				
			||||||
 | 
					        # Check if topic starts with the server name prefix
 | 
				
			||||||
 | 
					        server_prefix = database_server_name + "."
 | 
				
			||||||
 | 
					        if topic_name.startswith(server_prefix):
 | 
				
			||||||
 | 
					            # Strip the server name prefix to get schema.table or just table
 | 
				
			||||||
 | 
					            remaining = topic_name[len(server_prefix) :]
 | 
				
			||||||
 | 
					            remaining_parts = remaining.split(".")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if len(remaining_parts) == 2:
 | 
				
			||||||
 | 
					                # Pattern: {server-name}.{schema}.{table}
 | 
				
			||||||
 | 
					                database, table = remaining_parts
 | 
				
			||||||
 | 
					                return {"database": database, "table": table}
 | 
				
			||||||
 | 
					            elif len(remaining_parts) == 1:
 | 
				
			||||||
 | 
					                # Pattern: {server-name}.{table} (no explicit schema)
 | 
				
			||||||
 | 
					                return {"database": database_server_name, "table": remaining_parts[0]}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Check if topic exactly matches server name (edge case)
 | 
				
			||||||
 | 
					        if topic_name.lower() == database_server_name.lower():
 | 
				
			||||||
 | 
					            return {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Fallback: try to parse without server name
 | 
				
			||||||
    parts = topic_name.split(".")
 | 
					    parts = topic_name.split(".")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Pattern: {prefix}.{database}.{table} (3 parts)
 | 
					    # Pattern: {prefix}.{database}.{table} (3 parts)
 | 
				
			||||||
    if len(parts) == 3:
 | 
					    if len(parts) == 3:
 | 
				
			||||||
        prefix, database, table = parts
 | 
					        prefix, database, table = parts
 | 
				
			||||||
        # Verify prefix matches server name if provided
 | 
					 | 
				
			||||||
        if database_server_name and prefix.lower() != database_server_name.lower():
 | 
					 | 
				
			||||||
            # Might be schema.database.table for some connectors
 | 
					 | 
				
			||||||
            pass
 | 
					 | 
				
			||||||
        return {"database": database, "table": table}
 | 
					        return {"database": database, "table": table}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Pattern: {database}.{table} (2 parts)
 | 
					    # Pattern: {database}.{table} (2 parts)
 | 
				
			||||||
    elif len(parts) == 2:
 | 
					    elif len(parts) == 2:
 | 
				
			||||||
        database, table = parts
 | 
					        database, table = parts
 | 
				
			||||||
        # Only accept if server name matches or not provided
 | 
					 | 
				
			||||||
        if database_server_name and database.lower() == database_server_name.lower():
 | 
					 | 
				
			||||||
            # This is server_name.table, so database is the server name
 | 
					 | 
				
			||||||
            return {"database": database, "table": table}
 | 
					 | 
				
			||||||
        # Or accept as database.table
 | 
					 | 
				
			||||||
        return {"database": database, "table": table}
 | 
					        return {"database": database, "table": table}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Pattern: just {table} (1 part) - use server name as database
 | 
					    # Pattern: just {table} (1 part)
 | 
				
			||||||
    elif len(parts) == 1 and database_server_name:
 | 
					    elif len(parts) == 1:
 | 
				
			||||||
 | 
					        if database_server_name:
 | 
				
			||||||
            return {"database": database_server_name, "table": topic_name}
 | 
					            return {"database": database_server_name, "table": topic_name}
 | 
				
			||||||
 | 
					        # Without server name, we can't determine the database
 | 
				
			||||||
 | 
					        return {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return {}
 | 
					    return {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -139,6 +155,34 @@ SUPPORTED_DATASETS = {
 | 
				
			|||||||
    "container_name": ConnectorConfigKeys.CONTAINER_KEYS,
 | 
					    "container_name": ConnectorConfigKeys.CONTAINER_KEYS,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Map Kafka Connect connector class names to OpenMetadata service types
 | 
				
			||||||
 | 
					CONNECTOR_CLASS_TO_SERVICE_TYPE = {
 | 
				
			||||||
 | 
					    "MySqlCdcSource": "Mysql",
 | 
				
			||||||
 | 
					    "MySqlCdcSourceV2": "Mysql",
 | 
				
			||||||
 | 
					    "PostgresCdcSource": "Postgres",
 | 
				
			||||||
 | 
					    "PostgresSourceConnector": "Postgres",
 | 
				
			||||||
 | 
					    "SqlServerCdcSource": "Mssql",
 | 
				
			||||||
 | 
					    "MongoDbCdcSource": "MongoDB",
 | 
				
			||||||
 | 
					    "OracleCdcSource": "Oracle",
 | 
				
			||||||
 | 
					    "Db2CdcSource": "Db2",
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Map service types to hostname config keys
 | 
				
			||||||
 | 
					SERVICE_TYPE_HOSTNAME_KEYS = {
 | 
				
			||||||
 | 
					    "Mysql": ["database.hostname", "connection.host"],
 | 
				
			||||||
 | 
					    "Postgres": ["database.hostname", "connection.host"],
 | 
				
			||||||
 | 
					    "Mssql": ["database.hostname"],
 | 
				
			||||||
 | 
					    "MongoDB": ["mongodb.connection.uri", "connection.uri"],
 | 
				
			||||||
 | 
					    "Oracle": ["database.hostname"],
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Map service types to broker/endpoint config keys for messaging services
 | 
				
			||||||
 | 
					MESSAGING_ENDPOINT_KEYS = [
 | 
				
			||||||
 | 
					    "kafka.endpoint",
 | 
				
			||||||
 | 
					    "bootstrap.servers",
 | 
				
			||||||
 | 
					    "kafka.bootstrap.servers",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class KafkaConnectClient:
 | 
					class KafkaConnectClient:
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
				
			|||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -42,6 +42,9 @@ class KafkaConnectTasks(BaseModel):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class KafkaConnectTopics(BaseModel):
 | 
					class KafkaConnectTopics(BaseModel):
 | 
				
			||||||
    name: str = Field(..., description="Name of the topic (e.g., random-source-avro)")
 | 
					    name: str = Field(..., description="Name of the topic (e.g., random-source-avro)")
 | 
				
			||||||
 | 
					    fqn: Optional[str] = Field(
 | 
				
			||||||
 | 
					        default=None, description="Fully qualified name of the topic in OpenMetadata"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class KafkaConnectColumnMapping(BaseModel):
 | 
					class KafkaConnectColumnMapping(BaseModel):
 | 
				
			||||||
 | 
				
			|||||||
@ -952,6 +952,36 @@ class TestCDCTopicParsing(TestCase):
 | 
				
			|||||||
        result = parse_cdc_topic_name("MongoCDC.mydb.users", "MongoCDC")
 | 
					        result = parse_cdc_topic_name("MongoCDC.mydb.users", "MongoCDC")
 | 
				
			||||||
        self.assertEqual(result, {"database": "mydb", "table": "users"})
 | 
					        self.assertEqual(result, {"database": "mydb", "table": "users"})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_parse_cdc_topic_server_name_with_dots(self):
 | 
				
			||||||
 | 
					        """Test parsing CDC topics when server name contains dots"""
 | 
				
			||||||
 | 
					        from metadata.ingestion.source.pipeline.kafkaconnect.client import (
 | 
				
			||||||
 | 
					            parse_cdc_topic_name,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Server name with dots: myapp.payments.prod
 | 
				
			||||||
 | 
					        # Full topic: myapp.payments.prod.transactions.orders
 | 
				
			||||||
 | 
					        # Expected: database=transactions, table=orders
 | 
				
			||||||
 | 
					        result = parse_cdc_topic_name(
 | 
				
			||||||
 | 
					            "myapp.payments.prod.transactions.orders", "myapp.payments.prod"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.assertEqual(result, {"database": "transactions", "table": "orders"})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Server name with dots and only table (no schema)
 | 
				
			||||||
 | 
					        # Full topic: myapp.payments.prod.users
 | 
				
			||||||
 | 
					        # Expected: database=myapp.payments.prod, table=users
 | 
				
			||||||
 | 
					        result = parse_cdc_topic_name(
 | 
				
			||||||
 | 
					            "myapp.payments.prod.users", "myapp.payments.prod"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.assertEqual(result, {"database": "myapp.payments.prod", "table": "users"})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Multiple level server name
 | 
				
			||||||
 | 
					        # Server: app.service.env.region
 | 
				
			||||||
 | 
					        # Topic: app.service.env.region.schema1.table1
 | 
				
			||||||
 | 
					        result = parse_cdc_topic_name(
 | 
				
			||||||
 | 
					            "app.service.env.region.schema1.table1", "app.service.env.region"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.assertEqual(result, {"database": "schema1", "table": "table1"})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestKafkaConnectCDCColumnExtraction(TestCase):
 | 
					class TestKafkaConnectCDCColumnExtraction(TestCase):
 | 
				
			||||||
    """Test CDC column extraction from Debezium schema"""
 | 
					    """Test CDC column extraction from Debezium schema"""
 | 
				
			||||||
 | 
				
			|||||||
@ -0,0 +1,428 @@
 | 
				
			|||||||
 | 
					#  Copyright 2025 Collate
 | 
				
			||||||
 | 
					#  Licensed under the Collate Community License, Version 1.0 (the "License");
 | 
				
			||||||
 | 
					#  you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					#  You may obtain a copy of the License at
 | 
				
			||||||
 | 
					#  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
 | 
				
			||||||
 | 
					#  Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					#  distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					#  See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					#  limitations under the License.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					Test KafkaConnect service discovery and caching functionality
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					from unittest import TestCase
 | 
				
			||||||
 | 
					from unittest.mock import Mock, patch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from metadata.generated.schema.entity.services.databaseService import DatabaseService
 | 
				
			||||||
 | 
					from metadata.generated.schema.entity.services.messagingService import MessagingService
 | 
				
			||||||
 | 
					from metadata.ingestion.source.pipeline.kafkaconnect.metadata import KafkaconnectSource
 | 
				
			||||||
 | 
					from metadata.ingestion.source.pipeline.kafkaconnect.models import (
 | 
				
			||||||
 | 
					    KafkaConnectPipelineDetails,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestServiceCaching(TestCase):
 | 
				
			||||||
 | 
					    """Test service caching functionality"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _create_mock_service(self, name, service_type, host_port=None):
 | 
				
			||||||
 | 
					        """Helper to create a mock database service"""
 | 
				
			||||||
 | 
					        service = Mock(spec=DatabaseService)
 | 
				
			||||||
 | 
					        service.name = Mock()
 | 
				
			||||||
 | 
					        service.name.root = name
 | 
				
			||||||
 | 
					        service.serviceType = Mock()
 | 
				
			||||||
 | 
					        service.serviceType.value = service_type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if host_port:
 | 
				
			||||||
 | 
					            service.connection = Mock()
 | 
				
			||||||
 | 
					            service.connection.config = Mock()
 | 
				
			||||||
 | 
					            service.connection.config.hostPort = host_port
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            service.connection = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return service
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _create_mock_messaging_service(self, name, bootstrap_servers=None):
 | 
				
			||||||
 | 
					        """Helper to create a mock messaging service"""
 | 
				
			||||||
 | 
					        service = Mock(spec=MessagingService)
 | 
				
			||||||
 | 
					        service.name = Mock()
 | 
				
			||||||
 | 
					        service.name.root = name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if bootstrap_servers:
 | 
				
			||||||
 | 
					            service.connection = Mock()
 | 
				
			||||||
 | 
					            service.connection.config = Mock()
 | 
				
			||||||
 | 
					            service.connection.config.bootstrapServers = bootstrap_servers
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            service.connection = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return service
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_database_services_property_caches_results(self, mock_parent_init):
 | 
				
			||||||
 | 
					        """Test that database_services property caches results"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.verifySSL = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					        mock_db_services = [
 | 
				
			||||||
 | 
					            self._create_mock_service("mysql-prod", "Mysql", "localhost:3306"),
 | 
				
			||||||
 | 
					            self._create_mock_service("postgres-prod", "Postgres", "localhost:5432"),
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					        mock_metadata.list_all_entities.return_value = iter(mock_db_services)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					        source.metadata = (
 | 
				
			||||||
 | 
					            mock_metadata  # Set metadata manually since parent __init__ is mocked
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # First access - should call list_all_entities
 | 
				
			||||||
 | 
					        services1 = source.database_services
 | 
				
			||||||
 | 
					        self.assertEqual(len(services1), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(mock_metadata.list_all_entities.call_count, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Second access - should use cache (no additional call)
 | 
				
			||||||
 | 
					        services2 = source.database_services
 | 
				
			||||||
 | 
					        self.assertEqual(len(services2), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(mock_metadata.list_all_entities.call_count, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Verify same object is returned (cached)
 | 
				
			||||||
 | 
					        self.assertIs(services1, services2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_messaging_services_property_caches_results(self, mock_parent_init):
 | 
				
			||||||
 | 
					        """Test that messaging_services property caches results"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					        mock_msg_services = [
 | 
				
			||||||
 | 
					            self._create_mock_messaging_service(
 | 
				
			||||||
 | 
					                "kafka-prod", "broker1:9092,broker2:9092"
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					            self._create_mock_messaging_service("kafka-dev", "localhost:9092"),
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					        mock_metadata.list_all_entities.return_value = iter(mock_msg_services)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					        source.metadata = (
 | 
				
			||||||
 | 
					            mock_metadata  # Set metadata manually since parent __init__ is mocked
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # First access - should call list_all_entities
 | 
				
			||||||
 | 
					        services1 = source.messaging_services
 | 
				
			||||||
 | 
					        self.assertEqual(len(services1), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(mock_metadata.list_all_entities.call_count, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Second access - should use cache
 | 
				
			||||||
 | 
					        services2 = source.messaging_services
 | 
				
			||||||
 | 
					        self.assertEqual(len(services2), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(mock_metadata.list_all_entities.call_count, 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Verify same object is returned (cached)
 | 
				
			||||||
 | 
					        self.assertIs(services1, services2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestServiceDiscovery(TestCase):
 | 
				
			||||||
 | 
					    """Test database and messaging service discovery"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _create_mock_db_service(self, name, service_type, host_port):
 | 
				
			||||||
 | 
					        """Helper to create a mock database service"""
 | 
				
			||||||
 | 
					        service = Mock(spec=DatabaseService)
 | 
				
			||||||
 | 
					        service.name = Mock()
 | 
				
			||||||
 | 
					        service.name.root = name
 | 
				
			||||||
 | 
					        service.serviceType = Mock()
 | 
				
			||||||
 | 
					        service.serviceType.value = service_type
 | 
				
			||||||
 | 
					        service.connection = Mock()
 | 
				
			||||||
 | 
					        service.connection.config = Mock()
 | 
				
			||||||
 | 
					        service.connection.config.hostPort = host_port
 | 
				
			||||||
 | 
					        return service
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_find_database_service_by_hostname_matches_correctly(
 | 
				
			||||||
 | 
					        self, mock_parent_init
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        """Test finding database service by hostname with port stripping"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					        mock_db_services = [
 | 
				
			||||||
 | 
					            self._create_mock_db_service(
 | 
				
			||||||
 | 
					                "mysql-prod", "Mysql", "mysql.example.com:3306"
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					            self._create_mock_db_service(
 | 
				
			||||||
 | 
					                "postgres-prod", "Postgres", "postgres.example.com:5432"
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					        mock_metadata.list_all_entities.return_value = iter(mock_db_services)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					        source.metadata = (
 | 
				
			||||||
 | 
					            mock_metadata  # Set metadata manually since parent __init__ is mocked
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Test matching MySQL service
 | 
				
			||||||
 | 
					        result = source.find_database_service_by_hostname(
 | 
				
			||||||
 | 
					            "Mysql", "mysql.example.com:3306"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.assertEqual(result, "mysql-prod")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Test matching with protocol prefix
 | 
				
			||||||
 | 
					        result = source.find_database_service_by_hostname(
 | 
				
			||||||
 | 
					            "Mysql", "jdbc:mysql://mysql.example.com:3306/db"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.assertEqual(result, "mysql-prod")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_find_messaging_service_by_brokers_matches_correctly(
 | 
				
			||||||
 | 
					        self, mock_parent_init
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        """Test finding messaging service by broker endpoints"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        kafka_service = Mock(spec=MessagingService)
 | 
				
			||||||
 | 
					        kafka_service.name = Mock()
 | 
				
			||||||
 | 
					        kafka_service.name.root = "kafka-prod"
 | 
				
			||||||
 | 
					        kafka_service.connection = Mock()
 | 
				
			||||||
 | 
					        kafka_service.connection.config = Mock()
 | 
				
			||||||
 | 
					        kafka_service.connection.config.bootstrapServers = (
 | 
				
			||||||
 | 
					            "broker1.example.com:9092,broker2.example.com:9092"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata.list_all_entities.return_value = iter([kafka_service])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					        source.metadata = (
 | 
				
			||||||
 | 
					            mock_metadata  # Set metadata manually since parent __init__ is mocked
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Test matching with protocol prefix
 | 
				
			||||||
 | 
					        result = source.find_messaging_service_by_brokers(
 | 
				
			||||||
 | 
					            "SASL_SSL://broker1.example.com:9092,SASL_SSL://broker2.example.com:9092"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.assertEqual(result, "kafka-prod")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Test matching with partial overlap
 | 
				
			||||||
 | 
					        result = source.find_messaging_service_by_brokers("broker1.example.com:9092")
 | 
				
			||||||
 | 
					        self.assertEqual(result, "kafka-prod")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestTopicSearchByPrefix(TestCase):
 | 
				
			||||||
 | 
					    """Test topic search by prefix fallback mechanism"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_search_topics_by_prefix_finds_matching_topics(self, mock_parent_init):
 | 
				
			||||||
 | 
					        """Test searching for topics by database.server.name prefix"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Create mock topics
 | 
				
			||||||
 | 
					        topic1 = Mock()
 | 
				
			||||||
 | 
					        topic1.name = Mock()
 | 
				
			||||||
 | 
					        topic1.name.root = "myserver.public.users"
 | 
				
			||||||
 | 
					        topic1.fullyQualifiedName = Mock()
 | 
				
			||||||
 | 
					        topic1.fullyQualifiedName.root = 'kafka-prod."myserver.public.users"'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        topic2 = Mock()
 | 
				
			||||||
 | 
					        topic2.name = Mock()
 | 
				
			||||||
 | 
					        topic2.name.root = "myserver.public.orders"
 | 
				
			||||||
 | 
					        topic2.fullyQualifiedName = Mock()
 | 
				
			||||||
 | 
					        topic2.fullyQualifiedName.root = 'kafka-prod."myserver.public.orders"'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        topic3 = Mock()
 | 
				
			||||||
 | 
					        topic3.name = Mock()
 | 
				
			||||||
 | 
					        topic3.name.root = "other.topic"
 | 
				
			||||||
 | 
					        topic3.fullyQualifiedName = Mock()
 | 
				
			||||||
 | 
					        topic3.fullyQualifiedName.root = "kafka-prod.other.topic"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata.list_all_entities.return_value = iter([topic1, topic2, topic3])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					        source.metadata = (
 | 
				
			||||||
 | 
					            mock_metadata  # Set metadata manually since parent __init__ is mocked
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Search for topics with prefix "myserver"
 | 
				
			||||||
 | 
					        result = source._search_topics_by_prefix("myserver", "kafka-prod")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Should find only topics starting with "myserver."
 | 
				
			||||||
 | 
					        self.assertEqual(len(result), 2)
 | 
				
			||||||
 | 
					        self.assertEqual(result[0].name, "myserver.public.users")
 | 
				
			||||||
 | 
					        self.assertEqual(result[1].name, "myserver.public.orders")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Verify FQNs are populated
 | 
				
			||||||
 | 
					        self.assertEqual(result[0].fqn, 'kafka-prod."myserver.public.users"')
 | 
				
			||||||
 | 
					        self.assertEqual(result[1].fqn, 'kafka-prod."myserver.public.orders"')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_search_topics_by_prefix_returns_empty_when_none_match(
 | 
				
			||||||
 | 
					        self, mock_parent_init
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        """Test that search returns empty list when no topics match"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        topic = Mock()
 | 
				
			||||||
 | 
					        topic.name = Mock()
 | 
				
			||||||
 | 
					        topic.name.root = "other.topic"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata.list_all_entities.return_value = iter([topic])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Search for topics with prefix that doesn't exist
 | 
				
			||||||
 | 
					        result = source._search_topics_by_prefix("nonexistent", "kafka-prod")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertEqual(len(result), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_search_topics_by_prefix_handles_no_messaging_service(
 | 
				
			||||||
 | 
					        self, mock_parent_init
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        """Test that search handles None messaging service gracefully"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Search without messaging service name
 | 
				
			||||||
 | 
					        result = source._search_topics_by_prefix("myserver", None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Should return empty list
 | 
				
			||||||
 | 
					        self.assertEqual(len(result), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestCDCTopicFallback(TestCase):
 | 
				
			||||||
 | 
					    """Test CDC topic parsing with table.include.list fallback"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_parse_cdc_topics_from_config_with_table_include_list(
 | 
				
			||||||
 | 
					        self, mock_parent_init
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        """Test parsing topics from table.include.list"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        pipeline_details = KafkaConnectPipelineDetails(
 | 
				
			||||||
 | 
					            name="test-connector",
 | 
				
			||||||
 | 
					            config={
 | 
				
			||||||
 | 
					                "connector.class": "io.debezium.connector.mysql.MySqlConnector",
 | 
				
			||||||
 | 
					                "database.server.name": "myserver",
 | 
				
			||||||
 | 
					                "table.include.list": "public.users,public.orders,inventory.products",
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        result = source._parse_cdc_topics_from_config(pipeline_details, "myserver")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Should create topics for each table
 | 
				
			||||||
 | 
					        self.assertEqual(len(result), 3)
 | 
				
			||||||
 | 
					        self.assertEqual(result[0].name, "myserver.public.users")
 | 
				
			||||||
 | 
					        self.assertEqual(result[1].name, "myserver.public.orders")
 | 
				
			||||||
 | 
					        self.assertEqual(result[2].name, "myserver.inventory.products")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_parse_cdc_topics_returns_empty_without_table_include_list(
 | 
				
			||||||
 | 
					        self, mock_parent_init
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        """Test that parsing returns empty when table.include.list is missing"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        pipeline_details = KafkaConnectPipelineDetails(
 | 
				
			||||||
 | 
					            name="test-connector",
 | 
				
			||||||
 | 
					            config={
 | 
				
			||||||
 | 
					                "connector.class": "io.debezium.connector.mysql.MySqlConnector",
 | 
				
			||||||
 | 
					                "database.server.name": "myserver",
 | 
				
			||||||
 | 
					                # No table.include.list
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with self.assertLogs(level="WARNING") as log:
 | 
				
			||||||
 | 
					            result = source._parse_cdc_topics_from_config(pipeline_details, "myserver")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Should return empty list
 | 
				
			||||||
 | 
					        self.assertEqual(len(result), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Should log warning about missing table.include.list
 | 
				
			||||||
 | 
					        self.assertTrue(any("table.include.list" in message for message in log.output))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @patch(
 | 
				
			||||||
 | 
					        "metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.__init__"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_parse_cdc_topics_supports_table_whitelist_legacy(self, mock_parent_init):
 | 
				
			||||||
 | 
					        """Test that table.whitelist (legacy key) is also supported"""
 | 
				
			||||||
 | 
					        mock_parent_init.return_value = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_config = Mock()
 | 
				
			||||||
 | 
					        mock_config.serviceConnection.root.config.hostPort = "http://localhost:8083"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mock_metadata = Mock()
 | 
				
			||||||
 | 
					        source = KafkaconnectSource(mock_config, mock_metadata)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        pipeline_details = KafkaConnectPipelineDetails(
 | 
				
			||||||
 | 
					            name="test-connector",
 | 
				
			||||||
 | 
					            config={
 | 
				
			||||||
 | 
					                "connector.class": "io.debezium.connector.mysql.MySqlConnector",
 | 
				
			||||||
 | 
					                "database.server.name": "myserver",
 | 
				
			||||||
 | 
					                "table.whitelist": "public.users",  # Legacy key
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        result = source._parse_cdc_topics_from_config(pipeline_details, "myserver")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Should parse from legacy key
 | 
				
			||||||
 | 
					        self.assertEqual(len(result), 1)
 | 
				
			||||||
 | 
					        self.assertEqual(result[0].name, "myserver.public.users")
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user