mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-03 04:46:27 +00:00
Co-authored-by: ulixius9 <mayursingal9@gmail.com>
This commit is contained in:
parent
ad0c6dad88
commit
710e782f33
@ -94,12 +94,7 @@ plugins: Dict[str, Set[str]] = {
|
|||||||
"thrift-sasl==0.4.3",
|
"thrift-sasl==0.4.3",
|
||||||
"presto-types-parser==0.0.2",
|
"presto-types-parser==0.0.2",
|
||||||
},
|
},
|
||||||
"kafka": {
|
"kafka": {"confluent_kafka==1.8.2", "fastavro>=1.2.0", "avro-python3"},
|
||||||
"confluent_kafka==1.8.2",
|
|
||||||
"fastavro>=1.2.0",
|
|
||||||
"avro-python3",
|
|
||||||
"confluent_avro",
|
|
||||||
},
|
|
||||||
"ldap-users": {"ldap3==2.9.1"},
|
"ldap-users": {"ldap3==2.9.1"},
|
||||||
"looker": {"looker-sdk>=22.4.0"},
|
"looker": {"looker-sdk>=22.4.0"},
|
||||||
"mssql": {"sqlalchemy-pytds>=0.3"},
|
"mssql": {"sqlalchemy-pytds>=0.3"},
|
||||||
|
@ -17,12 +17,8 @@ from dataclasses import dataclass, field
|
|||||||
from typing import Iterable, List, Optional
|
from typing import Iterable, List, Optional
|
||||||
|
|
||||||
import confluent_kafka
|
import confluent_kafka
|
||||||
from confluent_avro import AvroKeyValueSerde, SchemaRegistry
|
|
||||||
from confluent_kafka.admin import AdminClient, ConfigResource
|
from confluent_kafka.admin import AdminClient, ConfigResource
|
||||||
from confluent_kafka.schema_registry.schema_registry_client import (
|
from confluent_kafka.schema_registry.schema_registry_client import Schema
|
||||||
Schema,
|
|
||||||
SchemaRegistryClient,
|
|
||||||
)
|
|
||||||
|
|
||||||
from metadata.generated.schema.api.data.createTopic import CreateTopicRequest
|
from metadata.generated.schema.api.data.createTopic import CreateTopicRequest
|
||||||
from metadata.generated.schema.entity.data.topic import (
|
from metadata.generated.schema.entity.data.topic import (
|
||||||
@ -190,7 +186,6 @@ class KafkaSource(Source[CreateTopicRequest]):
|
|||||||
self.status.failure(topic_name, repr(err))
|
self.status.failure(topic_name, repr(err))
|
||||||
|
|
||||||
def _parse_topic_metadata(self, topic: str) -> Optional[Schema]:
|
def _parse_topic_metadata(self, topic: str) -> Optional[Schema]:
|
||||||
logger.debug(f"topic = {topic}")
|
|
||||||
schema: Optional[Schema] = None
|
schema: Optional[Schema] = None
|
||||||
try:
|
try:
|
||||||
registered_schema = self.schema_registry_client.get_latest_version(
|
registered_schema = self.schema_registry_client.get_latest_version(
|
||||||
@ -203,31 +198,31 @@ class KafkaSource(Source[CreateTopicRequest]):
|
|||||||
return schema
|
return schema
|
||||||
|
|
||||||
def _get_sample_data(self, topic_name):
|
def _get_sample_data(self, topic_name):
|
||||||
|
sample_data = []
|
||||||
try:
|
try:
|
||||||
self.consumer_client.subscribe([topic_name.__root__])
|
self.consumer_client.subscribe([topic_name.__root__])
|
||||||
registry_client = SchemaRegistry(
|
logger.info(
|
||||||
self.service_connection.schemaRegistryURL,
|
f"Kafka consumer polling for sample messages in topic {topic_name.__root__}"
|
||||||
headers={"Content-Type": "application/vnd.schemaregistry.v1+json"},
|
|
||||||
)
|
)
|
||||||
avro_serde = AvroKeyValueSerde(registry_client, topic_name.__root__)
|
|
||||||
logger.info("Kafka consumer polling for sample messages")
|
|
||||||
messages = self.consumer_client.consume(num_messages=10, timeout=10)
|
messages = self.consumer_client.consume(num_messages=10, timeout=10)
|
||||||
sample_data = []
|
|
||||||
if len(messages) > 0:
|
|
||||||
for message in messages:
|
|
||||||
sample_data.append(
|
|
||||||
str(avro_serde.value.deserialize(message.value()))
|
|
||||||
)
|
|
||||||
topic_sample_data = TopicSampleData(messages=sample_data)
|
|
||||||
self.consumer_client.unsubscribe()
|
|
||||||
return topic_sample_data
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Failed to fetch sample data from topic {topic_name.__root__}"
|
f"Failed to fetch sample data from topic {topic_name.__root__}"
|
||||||
)
|
)
|
||||||
logger.error(traceback.format_exc())
|
logger.error(traceback.format_exc())
|
||||||
logger.error(sys.exc_info()[2])
|
logger.error(sys.exc_info()[2])
|
||||||
return None
|
else:
|
||||||
|
if messages:
|
||||||
|
for message in messages:
|
||||||
|
sample_data.append(
|
||||||
|
str(
|
||||||
|
self.consumer_client._serializer.decode_message(
|
||||||
|
message.value()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.consumer_client.unsubscribe()
|
||||||
|
return TopicSampleData(messages=sample_data)
|
||||||
|
|
||||||
def on_assign(self, a_consumer, partitions):
|
def on_assign(self, a_consumer, partitions):
|
||||||
# get offset tuple from the first partition
|
# get offset tuple from the first partition
|
||||||
|
@ -269,7 +269,6 @@ def _(connection: KafkaConnection, verbose: bool = False) -> KafkaClient:
|
|||||||
from confluent_kafka.admin import AdminClient, ConfigResource
|
from confluent_kafka.admin import AdminClient, ConfigResource
|
||||||
from confluent_kafka.avro import AvroConsumer
|
from confluent_kafka.avro import AvroConsumer
|
||||||
from confluent_kafka.schema_registry.schema_registry_client import (
|
from confluent_kafka.schema_registry.schema_registry_client import (
|
||||||
Schema,
|
|
||||||
SchemaRegistryClient,
|
SchemaRegistryClient,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -282,11 +281,22 @@ def _(connection: KafkaConnection, verbose: bool = False) -> KafkaClient:
|
|||||||
if connection.schemaRegistryURL:
|
if connection.schemaRegistryURL:
|
||||||
connection.schemaRegistryConfig["url"] = connection.schemaRegistryURL
|
connection.schemaRegistryConfig["url"] = connection.schemaRegistryURL
|
||||||
schema_registry_client = SchemaRegistryClient(connection.schemaRegistryConfig)
|
schema_registry_client = SchemaRegistryClient(connection.schemaRegistryConfig)
|
||||||
admin_client_config["schema.registry.url"] = connection.schemaRegistryURL
|
connection.schemaRegistryConfig["url"] = str(connection.schemaRegistryURL)
|
||||||
admin_client_config["group.id"] = "openmetadata-consumer-1"
|
consumer_config = {
|
||||||
admin_client_config["auto.offset.reset"] = "earliest"
|
**connection.consumerConfig,
|
||||||
admin_client_config["enable.auto.commit"] = False
|
"bootstrap.servers": connection.bootstrapServers,
|
||||||
consumer_client = AvroConsumer(admin_client_config)
|
}
|
||||||
|
if "group.id" not in consumer_config:
|
||||||
|
consumer_config["group.id"] = "openmetadata-consumer"
|
||||||
|
if "auto.offset.reset" not in consumer_config:
|
||||||
|
consumer_config["auto.offset.reset"] = "earliest"
|
||||||
|
|
||||||
|
for key in connection.schemaRegistryConfig:
|
||||||
|
consumer_config["schema.registry." + key] = connection.schemaRegistryConfig[
|
||||||
|
key
|
||||||
|
]
|
||||||
|
logger.debug(consumer_config)
|
||||||
|
consumer_client = AvroConsumer(consumer_config)
|
||||||
|
|
||||||
return KafkaClient(
|
return KafkaClient(
|
||||||
admin_client=admin_client,
|
admin_client=admin_client,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user