2021-01-31 22:40:30 -08:00
|
|
|
from gometa.configuration import ConfigModel, KafkaConnectionConfig
|
|
|
|
from gometa.ingestion.api.source import Source, Extractor
|
|
|
|
from gometa.ingestion.api.source import WorkUnit
|
|
|
|
from typing import Optional
|
|
|
|
from dataclasses import dataclass
|
|
|
|
import confluent_kafka
|
|
|
|
import re
|
|
|
|
from gometa.ingestion.api.closeable import Closeable
|
|
|
|
|
2021-02-01 11:24:52 -08:00
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
class KafkaSourceConfig(ConfigModel):
|
|
|
|
connection: Optional[KafkaConnectionConfig] = KafkaConnectionConfig()
|
2021-02-01 11:24:52 -08:00
|
|
|
topic: Optional[str] = ".*" # default is wildcard subscription
|
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class KafkaWorkUnit(WorkUnit):
|
|
|
|
config: KafkaSourceConfig
|
|
|
|
|
|
|
|
def get_metadata(self):
|
|
|
|
return self.config.dict()
|
|
|
|
|
|
|
|
class KafkaSource(Source):
|
|
|
|
def __init__(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def configure(self, config_dict: dict):
|
|
|
|
self.source_config = KafkaSourceConfig.parse_obj(config_dict)
|
|
|
|
self.topic_pattern = re.compile(self.source_config.topic)
|
|
|
|
self.consumer = confluent_kafka.Consumer({'group.id':'test', 'bootstrap.servers':self.source_config.connection.bootstrap})
|
|
|
|
return self
|
|
|
|
|
|
|
|
def get_workunits(self):
|
|
|
|
topics = self.consumer.list_topics().topics
|
|
|
|
for t in topics:
|
|
|
|
if re.fullmatch(self.topic_pattern, t):
|
2021-02-01 11:24:52 -08:00
|
|
|
#TODO: topics config should support allow and deny patterns
|
2021-01-31 22:40:30 -08:00
|
|
|
if not t.startswith("_"):
|
2021-02-02 18:47:02 -08:00
|
|
|
yield KafkaWorkUnit(id=f'kafka-{t}', config=KafkaSourceConfig(connection=self.source_config.connection, topic=t))
|
2021-01-31 22:40:30 -08:00
|
|
|
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
if self.consumer:
|
|
|
|
self.consumer.close()
|
|
|
|
|