mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-15 12:37:18 +00:00
Ingestion: Add Confluent Kafka topic and schema connector
This commit is contained in:
parent
dc7e05dd74
commit
4f6cc54465
@ -79,7 +79,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"bigquery-usage": {"google-cloud-logging", "cachetools"},
|
"bigquery-usage": {"google-cloud-logging", "cachetools"},
|
||||||
"elasticsearch": {"elasticsearch~=7.13.1"},
|
"elasticsearch": {"elasticsearch~=7.13.1"},
|
||||||
"hive": {"pyhive~=0.6.3", "thrift~=0.13.0", "sasl==0.3.1", "thrift-sasl==0.4.3"},
|
"hive": {"pyhive~=0.6.3", "thrift~=0.13.0", "sasl==0.3.1", "thrift-sasl==0.4.3"},
|
||||||
"kafka": {"confluent_kafka>=1.5.0", "fastavro>=1.2.0"},
|
"kafka": {"confluent_kafka>=1.7.0", "fastavro>=1.2.0"},
|
||||||
"ldap-users": {"ldap3==2.9.1"},
|
"ldap-users": {"ldap3==2.9.1"},
|
||||||
"mssql": {"sqlalchemy-pytds>=0.3"},
|
"mssql": {"sqlalchemy-pytds>=0.3"},
|
||||||
"mssql-odbc": {"pyodbc"},
|
"mssql-odbc": {"pyodbc"},
|
||||||
|
@ -8,6 +8,7 @@ from fastavro import json_reader
|
|||||||
from fastavro import parse_schema
|
from fastavro import parse_schema
|
||||||
|
|
||||||
import confluent_kafka
|
import confluent_kafka
|
||||||
|
from confluent_kafka.admin import AdminClient, ConfigResource
|
||||||
from confluent_kafka.schema_registry.schema_registry_client import (
|
from confluent_kafka.schema_registry.schema_registry_client import (
|
||||||
Schema,
|
Schema,
|
||||||
SchemaRegistryClient,
|
SchemaRegistryClient,
|
||||||
@ -38,7 +39,7 @@ class KafkaSourceConfig(ConfigModel):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class KafkaSource(Source):
|
class KafkaSource(Source):
|
||||||
config: KafkaSourceConfig
|
config: KafkaSourceConfig
|
||||||
consumer: confluent_kafka.Consumer
|
admin_client: AdminClient
|
||||||
report: KafkaSourceStatus
|
report: KafkaSourceStatus
|
||||||
|
|
||||||
def __init__(self, config: KafkaSourceConfig, ctx: WorkflowContext):
|
def __init__(self, config: KafkaSourceConfig, ctx: WorkflowContext):
|
||||||
@ -48,11 +49,9 @@ class KafkaSource(Source):
|
|||||||
self.schema_registry_client = SchemaRegistryClient(
|
self.schema_registry_client = SchemaRegistryClient(
|
||||||
{"url": self.config.schema_registry_url}
|
{"url": self.config.schema_registry_url}
|
||||||
)
|
)
|
||||||
self.consumer = confluent_kafka.Consumer(
|
self.admin_client = AdminClient(
|
||||||
{
|
{
|
||||||
"group.id": "test",
|
|
||||||
"bootstrap.servers": self.config.bootstrap_servers,
|
"bootstrap.servers": self.config.bootstrap_servers,
|
||||||
**self.config.consumer_config,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -65,10 +64,13 @@ class KafkaSource(Source):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def next_record(self) -> Iterable[Record]:
|
def next_record(self) -> Iterable[Record]:
|
||||||
topics = self.consumer.list_topics().topics
|
topics = self.admin_client.list_topics().topics
|
||||||
for t in topics:
|
for t in topics:
|
||||||
if self.config.filter_pattern.included(t):
|
if self.config.filter_pattern.included(t):
|
||||||
topic_schema = self._parse_topic_metadata(t)
|
topic_schema = self._parse_topic_metadata(t)
|
||||||
|
#resources = [ConfigResource(confluent_kafka.admin.RESOURCE_TOPIC, t)]
|
||||||
|
#topic_config = self.admin_client.describe_configs(resources)
|
||||||
|
#logger.info(topic_config)
|
||||||
self.status.topic_scanned(t)
|
self.status.topic_scanned(t)
|
||||||
yield topic_schema
|
yield topic_schema
|
||||||
else:
|
else:
|
||||||
@ -91,7 +93,7 @@ class KafkaSource(Source):
|
|||||||
fields: List[str] = []
|
fields: List[str] = []
|
||||||
if schema and schema.schema_type == "AVRO":
|
if schema and schema.schema_type == "AVRO":
|
||||||
# "value.id" or "value.[type=string]id"
|
# "value.id" or "value.[type=string]id"
|
||||||
parsed_schema = parse_schema(schema.schema_str)
|
logger.info(schema.schema_str)
|
||||||
elif schema is not None:
|
elif schema is not None:
|
||||||
self.status.warning(
|
self.status.warning(
|
||||||
topic,
|
topic,
|
||||||
@ -126,5 +128,5 @@ class KafkaSource(Source):
|
|||||||
return self.status
|
return self.status
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.consumer:
|
if self.admin_client:
|
||||||
self.consumer.close()
|
self.admin_client.close()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user