datahub/metadata-ingestion/tests/unit/test_kafka_source.py

64 lines
2.8 KiB
Python
Raw Normal View History

from gometa.ingestion.api.common import PipelineContext
2021-01-31 22:40:30 -08:00
from gometa.ingestion.source.kafka import KafkaSource
import unittest
from unittest.mock import patch, MagicMock
class KafkaSourceTest(unittest.TestCase):
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_kafka_source_configuration(self, mock_kafka):
ctx = PipelineContext(run_id='test')
kafka_source = KafkaSource.create({'connection': {'bootstrap': 'foobar'}}, ctx)
2021-01-31 22:40:30 -08:00
assert mock_kafka.call_count == 1
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_kafka_source_workunits_wildcard_topic(self, mock_kafka):
mock_kafka_instance = mock_kafka.return_value
mock_cluster_metadata = MagicMock()
mock_cluster_metadata.topics = ["foobar", "bazbaz"]
mock_kafka_instance.list_topics.return_value=mock_cluster_metadata
ctx = PipelineContext(run_id='test')
kafka_source = KafkaSource.create({'connection': {'bootstrap': 'localhost:9092'}}, ctx)
2021-01-31 22:40:30 -08:00
workunits = []
for w in kafka_source.get_workunits():
workunits.append(w)
assert workunits[0].get_metadata()['topic'] == 'foobar'
mock_kafka.assert_called_once()
mock_kafka_instance.list_topics.assert_called_once()
assert len(workunits) == 2
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_kafka_source_workunits_topic_pattern(self, mock_kafka):
mock_kafka_instance = mock_kafka.return_value
mock_cluster_metadata = MagicMock()
mock_cluster_metadata.topics = ["test", "foobar", "bazbaz"]
mock_kafka_instance.list_topics.return_value=mock_cluster_metadata
ctx = PipelineContext(run_id='test1')
kafka_source = KafkaSource.create({'topic': 'test', 'connection': {'bootstrap': 'localhost:9092'}}, ctx)
2021-01-31 22:40:30 -08:00
assert kafka_source.source_config.topic == "test"
workunits = [w for w in kafka_source.get_workunits()]
mock_kafka.assert_called_once()
mock_kafka_instance.list_topics.assert_called_once()
assert len(workunits) == 1
mock_cluster_metadata.topics = ["test", "test2", "bazbaz"]
ctx = PipelineContext(run_id='test2')
kafka_source = KafkaSource.create({'topic': 'test.*', 'connection': {'bootstrap': 'localhost:9092'}}, ctx)
2021-01-31 22:40:30 -08:00
workunits = [w for w in kafka_source.get_workunits()]
assert len(workunits) == 2
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_close(self, mock_kafka):
mock_kafka_instance = mock_kafka.return_value
ctx = PipelineContext(run_id='test')
kafka_source = KafkaSource.create({'topic': 'test', 'connection': {'bootstrap': 'localhost:9092'}}, ctx)
2021-01-31 22:40:30 -08:00
kafka_source.close()
assert mock_kafka_instance.close.call_count == 1