datahub/metadata-ingestion/tests/unit/test_kafka_source.py

82 lines
3.2 KiB
Python
Raw Normal View History

2021-02-11 23:14:20 -08:00
import unittest
from unittest.mock import MagicMock, patch
from gometa.ingestion.api.common import PipelineContext
2021-01-31 22:40:30 -08:00
from gometa.ingestion.source.kafka import KafkaSource
2021-02-10 14:53:55 -08:00
from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
2021-01-31 22:40:30 -08:00
class KafkaSourceTest(unittest.TestCase):
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_kafka_source_configuration(self, mock_kafka):
2021-02-11 22:48:20 -08:00
ctx = PipelineContext(run_id="test")
kafka_source = KafkaSource.create(
{"connection": {"bootstrap": "foobar:9092"}}, ctx
)
kafka_source.close()
2021-01-31 22:40:30 -08:00
assert mock_kafka.call_count == 1
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_kafka_source_workunits_wildcard_topic(self, mock_kafka):
mock_kafka_instance = mock_kafka.return_value
mock_cluster_metadata = MagicMock()
mock_cluster_metadata.topics = ["foobar", "bazbaz"]
2021-02-11 16:00:29 -08:00
mock_kafka_instance.list_topics.return_value = mock_cluster_metadata
2021-01-31 22:40:30 -08:00
2021-02-11 22:48:20 -08:00
ctx = PipelineContext(run_id="test")
kafka_source = KafkaSource.create(
{"connection": {"bootstrap": "localhost:9092"}}, ctx
)
2021-01-31 22:40:30 -08:00
workunits = []
for w in kafka_source.get_workunits():
workunits.append(w)
2021-02-11 22:48:20 -08:00
first_mce = workunits[0].get_metadata()["mce"]
2021-02-10 14:53:55 -08:00
assert isinstance(first_mce, MetadataChangeEvent)
2021-01-31 22:40:30 -08:00
mock_kafka.assert_called_once()
mock_kafka_instance.list_topics.assert_called_once()
assert len(workunits) == 2
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_kafka_source_workunits_topic_pattern(self, mock_kafka):
mock_kafka_instance = mock_kafka.return_value
mock_cluster_metadata = MagicMock()
mock_cluster_metadata.topics = ["test", "foobar", "bazbaz"]
2021-02-11 16:00:29 -08:00
mock_kafka_instance.list_topics.return_value = mock_cluster_metadata
2021-01-31 22:40:30 -08:00
2021-02-11 22:48:20 -08:00
ctx = PipelineContext(run_id="test1")
kafka_source = KafkaSource.create(
{
"topic_patterns": {"allow": ["test"]},
"connection": {"bootstrap": "localhost:9092"},
},
ctx,
)
2021-01-31 22:40:30 -08:00
workunits = [w for w in kafka_source.get_workunits()]
mock_kafka.assert_called_once()
mock_kafka_instance.list_topics.assert_called_once()
assert len(workunits) == 1
mock_cluster_metadata.topics = ["test", "test2", "bazbaz"]
2021-02-11 22:48:20 -08:00
ctx = PipelineContext(run_id="test2")
kafka_source = KafkaSource.create(
{
"topic_patterns": {"allow": ["test.*"]},
"connection": {"bootstrap": "localhost:9092"},
},
ctx,
)
2021-01-31 22:40:30 -08:00
workunits = [w for w in kafka_source.get_workunits()]
assert len(workunits) == 2
@patch("gometa.ingestion.source.kafka.confluent_kafka.Consumer")
def test_close(self, mock_kafka):
mock_kafka_instance = mock_kafka.return_value
2021-02-11 22:48:20 -08:00
ctx = PipelineContext(run_id="test")
kafka_source = KafkaSource.create(
{"topic": "test", "connection": {"bootstrap": "localhost:9092"}}, ctx
)
2021-01-31 22:40:30 -08:00
kafka_source.close()
assert mock_kafka_instance.close.call_count == 1