46 lines
1.5 KiB
Python
Raw Normal View History

2021-01-31 22:40:30 -08:00
from gometa.configuration import ConfigModel, KafkaConnectionConfig
from gometa.ingestion.api.source import Source, Extractor
from gometa.ingestion.api.source import WorkUnit
from typing import Optional
from dataclasses import dataclass
import confluent_kafka
import re
from gometa.ingestion.api.closeable import Closeable
2021-02-01 11:24:52 -08:00
2021-01-31 22:40:30 -08:00
class KafkaSourceConfig(ConfigModel):
connection: Optional[KafkaConnectionConfig] = KafkaConnectionConfig()
2021-02-01 11:24:52 -08:00
topic: Optional[str] = ".*" # default is wildcard subscription
2021-01-31 22:40:30 -08:00
@dataclass
class KafkaWorkUnit(WorkUnit):
config: KafkaSourceConfig
def get_metadata(self):
return self.config.dict()
class KafkaSource(Source):
def __init__(self):
pass
def configure(self, config_dict: dict):
self.source_config = KafkaSourceConfig.parse_obj(config_dict)
self.topic_pattern = re.compile(self.source_config.topic)
self.consumer = confluent_kafka.Consumer({'group.id':'test', 'bootstrap.servers':self.source_config.connection.bootstrap})
return self
def get_workunits(self):
topics = self.consumer.list_topics().topics
for t in topics:
if re.fullmatch(self.topic_pattern, t):
2021-02-01 11:24:52 -08:00
#TODO: topics config should support allow and deny patterns
2021-01-31 22:40:30 -08:00
if not t.startswith("_"):
yield KafkaWorkUnit(id=f'kafka-{t}', config=KafkaSourceConfig(connection=self.source_config.connection, topic=t))
2021-01-31 22:40:30 -08:00
def close(self):
if self.consumer:
self.consumer.close()